psych/0000755000176200001440000000000014552221456011406 5ustar liggesuserspsych/NAMESPACE0000744000176200001440000001706614551333702012635 0ustar liggesusers#last modified January, 2024 by William Revelle #added the various imports from stats, graphics, etc. importFrom(mnormt,rmnorm,sadmvn,dmnorm) importFrom(parallel,mclapply,mcmapply) importFrom(lattice,xyplot,strip.custom) importFrom(nlme,lme,VarCorr) importFrom(graphics,plot,pairs,points,abline,arrows,axis,barplot,box,curve,hist,image,layout,legend, lines,mtext,par,persp,plot.new,plot.window, polygon,rect,segments,strheight,strwidth,stripchart,text,axTicks,title,smoothScatter,matplot) importFrom(stats,aov,cov,cor,cor.test,var,sd,median,mad,cov2cor,biplot,loess,predict,predict.lm,rnorm,dnorm,rbinom,rlnorm,density, kmeans, lm,lm.fit,loadings,complete.cases, na.omit,na.fail,nlminb,optim, quantile,qnorm, pnorm,qqnorm,qqline,qqplot,pchisq,qchisq,qt,pt,dt,pf,qf,ppoints,p.adjust,optimize,residuals,spline,symnum,terms,weighted.mean,promax,varimax,uniroot) #importFrom(datasets,USArrests,attitude,Harman23.cor,Harman74.cov,ability.cov,iris) importFrom(utils,head,tail,read.table,write.table,read.fwf,stack,example,download.file,getFromNamespace,untar,unzip,View) importFrom(grDevices,colorRampPalette,topo.colors,devAskNewPage,dev.flush,dev.hold, palette, grey,rainbow,rgb,col2rgb,trans3d,adjustcolor) importFrom(methods,new) importFrom(tools,file_ext) #importFrom(psychTools,read.file,read.file.csv,read.clipboard,read.clipboard.tab,read.clipboard.csv,read.clipoard.fwf,read.clipboard.lower,read.clipboard.upper,write.file,write.file.csv) #importFrom(foreign,read.spss,read.xport,read.systat) #these are imported in psychTools S3method(print,psych) S3method(biplot,psych) S3method(pairs,panels) S3method(plot,psych) S3method(plot,irt) S3method(plot,poly) S3method(plot,poly.parallel) S3method(plot,residuals) S3method(plot,reliability) S3method(predict,psych) S3method(residuals,psych) S3method(summary,psych) S3method(anova,psych) export(alpha, alpha2r, acs, alpha.ci, anova.psych, AUC, autoR, bassAckward, bassAckward.diagram, bestItems, bestScales, biplot.psych, bi.bars, biserial, bifactor, biquartimin, bigCor, block.random, char2numeric, nchar2numeric, chi2r, cancorDiagram, circ.tests, circ.sim, circ.sim.plot, circ.simulation, circadian.cor, circadian.phase, cosinor.plot, circadian.reliability, circadian.linear.cor, circadian.mean, circadian.sd, circadian.F, circadian.stats, circular.mean, circular.cor, cluster.cor, cluster.fit, cluster.loadings, cluster.plot, cluster2keys, cohen.kappa, cohen.d, cohen.d.ci, cohen.d.by, cohen.profile, cd.validity, congruence, con2cat, correct.cor, cor.plot, corPlot, cor.plot.upperLowerCi, corPlotUpperLowerCi, corFiml, cor.wt, cor2dist, cor2cov, #cor2latex, cor.ci, corCi, cor.smooth, cor.smoother, corr.test, corTest, corr.p, cortest, cortest.bartlett, cortest.jennrich, cortest.mat, cortest.normal, count.pairwise, comorbidity, cosinor, cosinor.period, congeneric.sim, cor2, crossValidation, crossValidationBoot, cta, cta.15, cs, d.ci, d2r, d2t, d2OVL,d2OVL2,d2CL,d2U3, d.robust, distance, #df2latex, #dfOrder, densityBy, describe, describeBy, describe.by, describeData, describeFast, diagram, dia.shape, dia.rect, dia.ellipse, dia.ellipse1, dia.triangle, dia.arrow, dia.curve, dia.curved.arrow, dia.self, multi.curved.arrow, multi.rect, multi.arrow, multi.self, dia.cone, directSl, distance, draw.cor, draw.tetra, dummy.code, eigenCi, eigen.loadings, ellipses, equamax, error.bars, error.bars.by, error.bars.tab, error.crosses, error.dots, errorCircles, esem, esemDiagram, esem.diagram, #until we fix psychTools extension.diagram, fa, faBy, faCor, faRotate, faRotations, faRegression, faReg, fa.extension, fa.extend, fa.parallel, fa.parallel.poly, fa.graph, fa.rgraph, fa.congruence, fa.stats, fa.diagram, fa2irt, fa.random, fa.sort, #fa2latex, fa.lookup, fac, fa.multi, fa.multi.diagram, fa.organize, fa.poly, fa.plot, fa.pooled, fa.sapa, factor.congruence, factor.fit, factor.model, factor.pa, factor.minres, factor.wls, factor.plot, factor.residuals, factor.rotate, factor.scores, factor.stats, factor2cluster, #fileCreate, #filesInfo, #filesList, #fileScan, fisherz, fisherz2r, fparse, fromTo, g2r, geometric.mean, harmonic.mean, headtail, headTail, het.diagram, histBy, ICC, #ICC2latex, iclust, ICLUST, ICLUST.cluster, iclust.diagram, ICLUST.graph, ICLUST.rgraph, ICLUST.sort, iclust.sort, interp.median, interp.quantiles, interp.q, interp.quart, interp.quartiles, interp.values, interp.boxplot, interp.qplot.by, interbattery, irt.fa, irt.select, irt.0p, irt.1p, irt.2p, irt.discrim, irt.item.diff.rasch, irt.person.rasch, irt.responses, irt.se, irt.stats.like, irt.tau, item.dichot, item.sim, item.lookup, itemSort, item.validity, #irt2latex, isCorrelation, isCovariance, glb, glb.algebraic, glb.fa, guttman, wkappa, kaiser, KMO, keys.lookup, keysort, keys2list, kurtosi, lavaan.diagram, levels2numeric, lmCor, lmDiagram, lmCor.diagram, logit, logistic, logistic.grm, lookup, lookupItems, lookupFromKeys, lowerCor, lowerMat, lowerUpper, make.congeneric, make.hierarchical, make.keys, makePositiveKeys, make.irt.stats, manhattan, mardia, matMult, matPlot, matReg, mat.regress, matSort, mat.sort, "%+%", mediate, mediate.diagram, moderate.diagram, minkowski, mixedCor, mixed.cor, mssd, multi.hist, multilevel.reliability, mlr, mlArrange, mlPlot, m2d, m2t, nfactors, omega, omega.diagram, omega.graph, omegah, omegaSem, omegaFromSem, omegaDirect, #omega2latex, outlier, paired.r, pairs.panels, pairwiseCount, pairwiseCountBig, pairwiseDescribe, pairwiseZero, pairwiseImpute, pairwisePlot, pairwiseReport, pairwiseSample, parcels, partial.r, phi, phi2tetra, phi2poly, phi2poly.matrix, Pinv, plot.psych, plot.irt, plot.poly, plot.poly.parallel, plot.reliability, plot.residuals, polar, polychoric, polydi, polyserial, poly.mat, p.rep, p.rep.f, p.rep.r, p.rep.t, predict.psych, predicted.validity, principal, pca, print.psych, progressBar, Procrustes, Promax, psych, phi.demo, phi.list, psych.misc, quickView, radar, rangeCorrection, #read.clipboard, #read.clipboard.csv, #read.clipboard.fwf, #read.clipboard.tab, #read.clipboard.lower, #read.clipboard.upper, #read.file, #read.file.csv, #read.https, reliability, residuals.psych, reflect, resid.psych, rescale, response.frequencies, responseFrequency, reverse.code, r.con, r.test, r2c, r2d, r2t, r2chi, rmssd, RMSEA, SAPAfy, scaling.fits, scatterHist, scatter.hist, score.alpha, scoreItems, score.items, scoreBy, scoreFast, scoreVeryFast, scoreWtd, score.multiple.choice, scoreIrt, scoreIrt.1pl, scoreIrt.2pl, score.irt, score.irt.2, score.irt.poly, scoreOverlap, scree, schmid, scrub, SD, selectFromKeys, sem.diagram, sem.graph, set.cor, setCor, #setCor.diagram, #setCorLookup, shannon, sim, sim.anova, sim.bonds, sim.circ, simulation.circ, sim.congeneric, simCor, sim.correlation, sim.dichot, sim.general, sim.item, sim.spherical, sim.minor, sim.omega, sim.parallel, sim.rasch, sim.irt, sim.npl, sim.npn, sim.poly, sim.poly.npl, sim.poly.npn, sim.poly.ideal, sim.poly.ideal.npl, sim.poly.ideal.npn, sim.poly.mat, sim.simplex, sim.structure, sim.structural, sim.hierarchical, sim.multilevel, sim.multi, sim.VSS, smc, spider, splitHalf, statsBy, statsBy.boot, statsBy.boot.summary, summary.psych, superCor, superMatrix, super.matrix, structure.diagram, structure.graph, structure.list, structure.sem, tableF, table2df, table2matrix, target.rot, TargetQ, TargetT, test.psych, test.all, test.irt, testRetest, tenberge, tetrachoric, thurstone, topBottom, tr, t2d, t2r, skew, unidim, varimin, validityItem, violin, violinBy, vgQ.bimin, vgQ.targetQ, vgQ.varimin, VSS.parallel, VSS.plot, vss, VSS, VSS.scree, VSS.sim, VSS.simulate, winsor, winsor.means, winsor.mean, winsor.sd, winsor.var, #write.file, #write.file.csv, Yule, YuleCor, YuleBonett, Yule.inv, Yule2phi, Yule2phi.matrix, Yule2poly, Yule2poly.matrix, Yule2tetra ) psych/data/0000755000176200001440000000000014551520565012321 5ustar liggesuserspsych/data/cattell.rda0000644000176200001440000000102113000516317014417 0ustar liggesusersՔOAl`%R%PH!ۊ(h"E1qjjhCKjjZć@Fxd}F'3ɔR[%0nsRIklO1RĞwI_+Ղ>y>_dW?J+5Ϭ[6n6%~lݍEO FK.Re³Ț5c >999'y^xouþeR7:֭|?صy]ܒmA7/Ug碑%.8yRv0g/:\ꐇ|6p.}Q~v]OyoR ؐ/7<syo!__'=%,zԾ:Rϙ>e+Q1Nwxzrq4i&J|y:;])*H?ժ oo8 spsych/data/Harman.5.rda0000644000176200001440000000060211662241242014350 0ustar liggesusers r0b```b`@& `bN H,M3e+`qzv^?'Aa4:BkYg@ଃ"m py(UAnqD?fA1IW@Tq @hT:CӠ :* odAꏃSPa}N@,@\ J" <@̊DJsSla:Z<[HQbr! g3AᙢPx(< %k5:*U_PX,NK)I-ɯM+姥5%Te%攦y@ tpsych/data/Gorsuch.rda0000644000176200001440000000063011544510615014414 0ustar liggesusers r0b```b`@& `bN fw/*.M)i\[vnߣPAo4_=t_'Rpac03~ } Sv_9f?1  ,fyP S翀j?_0́-A a/9pDU磻Շ0*PTۿDUG}0>}Pa x8h`o;;,n_5-0d)^pc``%@%y@0TT-H%3/-f+K-JJ́8s3ZTf2qIj^qf~LdzRfhd g >a3Y.psych/data/Gleser.rda0000644000176200001440000000042612262550455014232 0ustar liggesusersuRj@ WCL G =iSڂ]s\9YUSh5iVCj JNEKnz"~]hEp5θCqt9 :tUC~\SS7q5|?/@^{Tp'P;g7tWiWwSsƂ.usdݿwѾweWV+&ckcn5vk]ATym[/yԝ~$ÿ&=>{d^U*(Ucq}\>GfeT2}7iӌ7qqUf̬ղ,%?0b7ey2C]}wPfl鳖uթAu#&mĎ~dx.N9֞S_Rrϴ"O }Ii>*zM^=Y~xZ6:GH8I: u5Z85_N[ӹ-U2v%4j5/=G|݆_5r5ωCa!9}Xe3u%)k[%gs߿z'KG|ss`* n5 ie Aoh;`Ige ʎ%87;lwG|3\3i? u8V3ݤSN 'HtշoC[Qi C_{sQ74]/yjgG4u2詗m0'?#>A}EN@IqO1-N9^!_ȧ"HF"c+<8ݱhR1 _s:/uɺhgp^ y[(~Q/:Ԩ26/G˴uj徂߈?tr;iHH8zL~sC?_0 y??ַ8_|V/n3ʕwWk a'wY崼2w8[?7? ?!c ?Kg?&gހ>?puBEC@?7_4o7G7rMoR}67[nY3|{kuC+2dYWu4FCV̎ytdgXrp;w!3}Q‹"On7C~sہϻ*ߨ~=*lYu<!7jtҠcYޅv (JGRz,ǖ[~;bVģv8?7+ psych/data/Bechtoldt.1.rda0000644000176200001440000000234311543736571015066 0ustar liggesusersVmlSUcV`?b #!ĐE7ڭwpj 1H!QLAd!`ck5e.`? s>gck}yӕ++.+˕'N_ğ{ē+5v$|13nԇϧk[j\)KM77R%_w ^>-9Ԝ=m[ˡ;6:YG.oA݌W0)|5q͚R˪ӱ;tVFS9:/~:}(*fɓЭ Mj=T #+|jP;Lɧ7c[e'tG<4].B2(!nzԯI _B?tHvlq sw8cp; ?e;I& cn6y7 5v%tC2oEok_ǕRڒDDR~h|ēe#ɼƇdN{(9w)z诤tU?C?>@jr:>GdX<#WxHzo\=Se`4nL;bs\>7z7įYXꅅVkѸxdUA?ϰvc4H!9*Ca*Q;?|sIiAj-wFשT:iwJo??C8##v+E74뢃q;OןGj_c^? >Π?֡ʬ[;ߙ)<9=^RG4Ƈ~~r s+/|[йp!XQ@Tfp߁^A{˓ܹ@8O0# "y@aܘgunp/a-=Oyug`\X_K :߳V؛2cŧ`.͡EOsюo3GC?8E~yv@bx 4>)O_ES M"hu|=9gyEϯ75[W_-f=#A9=;֕;g/D2vցvw,>:WO.>#yD  atj:GK5uHjRÍD曹2?777_@G:½scy7~M"CIWrբ od TQMں)QFנ)p&UuNGTT@]?pVzzwvt\NR2腍C^NM:H=<0.nו۹+x7u0MϯxpUiT] MS}d8sKpsych/data/bock.rda0000644000176200001440000000143111322775716013731 0ustar liggesusersKOSA-RD]h" q^qEܕ GMt?r鯠=-Bo/g~_qܹ3gO3<:o, {Z}r:GDu613ibIGHuر b4bw9َ8[ǗKݼ> Z{hK-O;_NuG~/VimEmY_h@0U҅/u%{o/}M;Ap}(Y{vbFE`w7H>MpsKWv paa-X|˶1߉Yans9.ڵx쯁w?~~yOA+}HCI>c- 8v s?~~y+Ծt;OmdS0釙w)o:W@ ps`~0pz1_0`?a34~`?XaA_ʜ xivєp%Pu`uдW T ONL*I,9䤖dAy2D'$[-*_.MKˬ[ UZ  HML/J @U iaNR{psych/data/Harman.rda0000644000176200001440000000125411221723761014213 0ustar liggesusersUOA^[Rxŋ=7#Hc H;$ֳ݊gz3gx 1b@ş/xgd$۷3}޴2>}9?Tu:Wx(1PKh;3 @ML&Mhn{AP Ǩj1K+Zso̽͝Zs9߷',x۱#wB/?kTT%\_|C'9BM?Y^ƺtK*'Z]Yvo9]dߗ;RʝQRwZWRxw:]a|TύRlčr8o_%݉r.toiJ9X |{!;BkFTR}9+MaQlT/.bޕܾJjg%goߍV/dm% JawćJ|rR~m58dk6虿d2k&_tfnsx]ݝ~dtz!۰ٽvfm+eg96gfmߜ9&{%t%␽-x,uMy.J?9Ry).;ŷ6YuWzTF6{9Kq1_Wrֿ2gWv?OBxn)%n ɞ̳l} S%ˁRsn2UE9?JP;4x-fkHNJRg]W|r=s4ͽw .d҄qk+҂gwښ9iDj#|m v}LtWM ӻl~;gamu^;?y~56;6֙n~yKCɰh2]{ϝQ8(u3UvMt=0hYvm0nW1o b|WJl C߲-!r~[^!|=!w'UUK᮳X0wn`U%ڡ% _lU-!-D|PvA1C8\?^R5Umk0}M.EҬL|[>"[W1;CqXz^5Qo6M8:{vjTz;tjQy{~w{؋?_'3krDKj~_*~ =.^חgVn-l?kZb?UU K{>͵3\ Id ϧ@n# k^jk$knOezU; =.<8]bg$kpnOaἪ9'Y=lknŘ(Q)/V \ؗOpNVn#O·jk"D:"oFksk0gznZܻ mέ˙ƞ6q49haoL4. /zC0|5=mۖs0c[jǕT kïs1|N,֪U n>${Z*o1'Reg^7]5]W]~VgKo9{k'c,ӸbHmkw\7?>k8/H^u@g?iD6&{g K P>ywŸ_5oau0]UΆ._ Z9#IWͷowjCSqhs̆+gT]yg&OH鶝y%|gtDS Φ[Rąi{qjWw 7k7{o]_(}v%!=OS{'kn.v;]ڒw8bn^s:[4ߨW㔼so*Il1cgk[T[m&J{'CYb\Y\|8jVͷa*ֻV#Sn; C[߄뵡83kM{<b;r|8bOk+܇ŜWa럆e?s*wOXgn.Zg:_.?[NX'Gc,ƣ#ĺoBNJ̽߇wMm3vBͽT㘫?b~PŹdv&oܸR%-O<2\3Gv}GuA8> %~t\/bIU;e⒋/l3rvk\>𖍉[̮V~}<|bj6q-q.o;wpWU=xfgur#37ɧu|.į-Wt~ӧ׍Sa] -1_ `l&.{鼮k\}'ةP/8^d_cۯ#w#D#ps<ȏ"B:1=D{;9"B> Iwrv"%BEĆ#v`~idvaK!u2B.Yv\΀љC/A3DL2|+~a8_g5nэ-+ oEL76uằ0ߺlnلlFdSaAMm{b{83z݃=zȰ/D=ې})!EggBA#qC>Y2!|lpj{^ҡ~­%z?C/}/swR~A{c žsgCH}T>b_}}P~O?ف~*5ȗ!t~T?_8s.x\ ^\ V8!`w4!\o3ȋna98=-t:6P#G@#pc[F~PFrfCD-D.%DD-~ KY=(>JYQ:(G(%Q%G{6J!ѿBQrqCu)Gϡ3GQws@9t'G~ȭ)785.9H;s)rP3x;Fs F16F|r~cԈ18|ɓq}6Nލw"O g<q4u|!`0_; ؃nkl@`^NУ&5rZ>!`rO6"tLR'I3~`r Akiix>IOMR&$a {i>)tNS߂)^MXO)|Ɩij4uuLÍi1Mmcps+86M=F4xM4B^Lc4ub|'4`v!3]6z 0g{3|7CΠoBY%g!f1K%VYj,5~RS.3\Ưebur\[|nԇ..۫V-(+KgFE[vүORWіjʞMnQ`SrhgHi!oQLr f][ /7]9Ge4/PyeُSMß~XwyәyާS^5@-mA:xl>}:N:W)?ƛ;EtUfY02_3tIgHNly#^7#aNg{bq`?>y^y&&bFwa>75g|tc-߯J[H/?x7|$,c!vO%G=X,XbZ?GGj}sb)B to SNW?ύ)C?GGպQj[Ay;b}e]3n;}3_RK4/Ƙ}߱~Ϡ3O'd*89'R|GC?GG<_)gup/|/o}G]ta?k91d_C?s}1ҸY#R.פ%9bI_cԉSx]>xL5:b|њJ/2p^.T+!#őڐa0aۆ< 19uz'aZ#sQsl^I4d!?jtՖa^2:|^2W3(b`"ZfEf8?{)\@ psych/data/Reise.rda0000644000176200001440000000267313015377604014066 0ustar liggesusersVOSw}RR!( CS'L̈kę0 2A҇ҵ+p C΢`dm$*&ZZTEN&U/_9s%JpjT \k r|tuqBlǻ[)7߁xsz& &0u7q@W/n;-~ap'dK#6:U' 㬸 ;hsA4'*Z-\~92~~S% ǿ;qW<1}-:X+Ydz`_iI$ǸS=Z+ KrmCi?KJ=~ +#Nߐjv=[}䨣ɱέL<,w5 CАC?Knnspx#뉤pjrpQ=lcG@WDkZwMiݴ{ܑT~^} gq9qB0dQCNmB`j8i7PW)pǘ:#@b]cU:%@"/E\q?)Dr dlj*M_/]QQCi~ :".8nkcQB\WB~x@Q;׌[س;80Te͙"n]+ڣgΗ鳳^Ej{<#ƺtwQMtj+.ក^?Qv/hг_QW8_3֋Ao.`L=g~ st{Ad(W>8EL@;#Fr/b^e|CxLlU:3iՆr=nK@W=?nK VLnGb^E) 7\y ` eЏJq`ثcNb^anGg"X yӱErgw9/yҹ+."˝KwP %x,Bf)#R2S\*bgf^LAʋqk6)f勐A6_*k22x-!3 2& E|NRY)F77JRRV'?כ.] psych/data/Holzinger.rda0000644000176200001440000000156611543737625014767 0ustar liggesusersՕKTAƷu11A*v}q0HP3 1 BBLĢ L+̊XD4fED,4XҝqY?ݝ;{v~s󝹇"buα*JR*PV>)rv<~")SM0ь_4iXo*{@×.^.חm?V@E)kIruْHƩ|_{o aSd8o͸G%d^w| m49RA 7"lμC,Z4)h22l up:t! {\D@77L}'&KCvXjg:iia+zve=Ze.& .t)ocR;Sh٪7&f)wlSsԪ]>@}ܫ(^Iw,n{5Q ZG]9e!p5 NDja jkaٗ #[5ݐ8Ǩ4ȣSu=:'>%n,3xE|WnodݙZ(z|@7vt&{'7 u>y>E|@7sA6ZXHy<@k@78@}npǸWsy>EotC>CbJ1*OW8w~WGkH'FMr6h]}*:0> //a%io{psych/data/Thurstone.33.rda0000644000176200001440000000120013015377163015217 0ustar liggesusers r0b```b`ffd`b2Y# ' (-*.K36*`{׋%x?Wvbe ?bu Gk8 ^V?by#kh[p0nZs'Svr]x@/F7]-ޛ?P`v\l>H/p^*5کmT9?`*UuZfǕfރ waPHª6/əcYCfޕM+'?z\٬+@ۿȸݽ~PfuOB׋%o۲=c9fxa{zӿپMq+n,a?0aa=zRV %Soh jU ߀lx5/2@+|?~xz'p@U?*`'~ ]vj]Ku:0.zC^XG:%|c }VEIUe3qy G]qPw}\A_]_9H~p)ȅ4 Ǿ3Tg>9ѻ۳IO2N:<7i=S5u[/w6|mrelߝuۏI-z·r1'==|V|Ԯy m8>FUyWKG~Zs?l"^uԍW }~zȕ/ܠ?-"U?W!nh[d|e'5w-9={OrpuM[W:+AT>,x2PsW}hbX?/=wt^񐧩> %v= {ᮂS9RuO!_%/ ᬟu*~/rrF %%W^#^hO/ΙGW1ta'^8Nʿz{7DT~M7Y7n6iY̊#+ym;/杗n9W8g2w>f>6Ef*ڲ%3a3>˶ymy㹱7óm>iف9u-3aJ2642)|có\p\zW= \sM-:EggSK\ O98mmS>>:=c;m C.=p]mn=,W;bPRc4yg܄5aX\y{W=^6Sz?tcm53<ȋ Q;ɫ_u7%gccpl19%[.Iop雺NZu"G>\r=l^:|%Do8λG\υqbۮ/_{ߞepNؓ#^` YoIUyES:4'"yΨ]s[ִz9_z/y] >Vîvjkp>Q }aޚ-i&Tc}c~:ǸG{K1\'[1>s>$2O3gyϬ_ak?\]7p}|\hZq]zaKHp1NLz*}8SI nJ6ݏ"\f{w^sk/̗^8WҚsy*y@rk =ظvsj=o$Ƃךe44cH^֚|!ja_M;+=bvLF/t-d~Oku>cyuu=!̼gWsIg]=4ig?؏&2wS<Ә0c=xKW~$Yx>+F\'x&`؇yio^ѺL${s]sRcwEX$gSr r3͗kNgt|#=-k' ץ5z3y5r5| łukd75MsJ6/Y[sQWyM4şUK7鼗zjX9)/½|~y_x1Zc[%c9-\xm]$=&.7Oj 5?zؼR^p'Gq=(ՎgE:3罎crQ'I=>'M^?H5N{߸n<#cWNϲK~4X$OkL59ټ"xX?~c%q>:X'O_OskG%\ҙ!"Ŧ'tFY~j?jϦ3+=5Ěy踶x~L18#Xyzf\^=57<^ȟ sՋ clc|ؗyR "I90/٫ߞSy;{c9$̭]ϕM~ᑞg Kn٤]b5:.ύa0'IۮS^JǵXgY17?9#>NZ0}r-Vn{zcɇz<6q/H~MX^}+zyo]'gٞP>Z4#''9Q)iק|~9_C~?Gɫ6'OT>\zߋ0sM>^k5<վ}ה1"/j%iHO+Iwc\#96\:osILj\6nu=p>R xvM=T8?s_k߁<[bls_,t}>c%~zie+֛4$OO%1j>v~=~i~6 ٨߷[WN'CI'N9^Ik#z{Ǭ_|?帽WqHqzuػ47ןioX9) c ڼ{O~潯[3}gy!)AL<[}!)ƐC#VŐ@_w<ϼ'0S:Y(cΔy_y,˞:zzV9ƱJH9i ޘ8V<{2x^XC{1^=3O8y=ƓZe=l^J:oO5Ͼ9|=oku~}GF.B<~֚rJ/7yƷMC6Ga K>asny"MM1鼓oqrsǹy}.z և5X/x~a~\Mx55u՞g!㐰bzؼRH^ǥ6?HoN |x>HC;ڧ<\wɛfx/8_yDi'1ݯs3Cy}&ҋ1g=VZk֯3pgFڃ̚Y<٫smҞ=syеb9.{y>=>[i:Ⱥou_Jm|E|Z^'{{H<9aԨϴ~8{v{yP*yw q {^'|}=}:9枮-c5<{ޘ >xZ\c;ߎt?! ˟zؼ葬ɞ^`1RK}yNG=Tzwww6bj֔qrٛzؼwio-ch7ՌkuR9=u΍npEة/^1cybߧ1R^IϳNnݟo<s腋pL;֛^g|؟LR[I%}ώ/r)N1ǵ4ڇ[l{ubX%<>xy7zu1K{wCN:ǁf =}cȚ8ԋ0xm>w7e9յ<_q?xIi͇d{^:/O%<[C:G8,;^ksexIuMaJ|%5RĞ˭=^moi|z.s|gi_lor^l&ͅCڷ{%^~s\Io͙GϗZPLs:x:2^_ZOc,OmW-CH2_WomײGZ"Ģi|؏9Mq%f=udJv͵k9}j`?j|[l\琼~~;oO5w:=\~,^[_Oxqgr?e^99{7kߟ>79j27YcAz/T y>z5[beG>yEmO;u)ɛLM1k.B׸g4}uzMX~c{#g3-񲯛E16^78^ޱ8ce]ϓ8g^s]XSkԜtNhX>+ j {}͉\N1s nľ #3i֟wrx{hDy }>s~Q]z0kty5kWtǼfH:>QۧgjֻkkO.9~;{~xs|%Gs88o&?Z}i>boaj {O?cRcN~=g7KwL|F4%)17Z'mydo%g{Kg:s]mkbX:i >rXu/tㆽ$=ެ+Hn;{}ג|νz"{͸N |dw*1놱)Xzsn̊ό\8aI3~8\}{!nKc9㸹IcEO{+㥯zN]\rXa<1{ b>'6w3[49_&j1je>w-XK1H.%/VW[{g1='{AkkzY6|ý^:Ig+|`g3|zGs?3:;<}p}ĉSr,&w:3jׄ4N[X9ϸ^uLxO:f-C/א'๕M<<\qg"K\O <uf_xjikXٞœ]I'>riC)HM ]:?Kn3Cg8U7y~:+H1q<{gvc<_r>bbx{UO?􄄿sX񴎥庨Cس{\1'[y$?[wf8'&=y I\7su4sN8z' #rhzs5}9oS~be}q>6yQ7x8f+}^ tloܟ:+73nc/7KIj/R-gZJ0n#7<.y5񢆝5#Rjx=?)FalR:0Ѵǧ~\[Ww ;GYwz>zI`,};/q,9zrLTtF%-st2& =$2}O4t6pjtiHxc7菮}fa}~z{xƹP:8~Qin=l^Lk֫ 糖=L̈́˒GdɵSLQ{IWi5jg -xCι=߷Ø-]bzZ\P= 炔OK~u6^cfEoA]g\뎽ɓ 䜞:?&]o{߮Wڳޛ։׋ =nLF{ZtmꜰJ>P73F{x%sHwDžI'nOҼ%!Fͼл{ѡF$Att{Rs,|1O~j, {zؼ>2֢מ|)>Ųi٩9~fU%O/f=ΎmM6/e+Ob>Nu*=ߑKKt}6o`.=N^5=cnv(%Y碞?40oO:ZSxg#̵=NYKsz8'؇}k2oUy\_K8.Ɛu qgTӌ9Rgcc/{SxY}oʻji~#Πtx׫ITF^kt5do^Ҝg;7aJyJZwԥ%1`†aӞͻc.Rqt=u=aÒ.zzcԊruxŸ^|Cg&i=?b?}9;yglRɃ s.{ދE|NNziq~{{^! G=~5pwoMMYg4|90}֙<Ƶܝ+'E=mqHyz1J};^{sy)a?{ϟ\_ڣ{]5LzqJ{^@9|jk;q`:`6/,Sg\R0k/7y4HOz; I܌5X0j䤋sqxsg%޷m96/U&7x)Xܠ]%d'1k^;xЫױ5j+rf'yS Ln#jϛ}ÉKB}'XW⢵qM_)ՈJxIGsԳ?92659KQO;Zz%jNNiaԳ5뮓?[ zNYcc=u9BzBN'Ǹ33o;yq-3ꗤ>ߚ07ؑ#i}朵kd/Iɇٿ3=ܑE&ޤ;{ABk'M}'OubxLsX)8r.?η/R kMz[*j/whOHЗ%﯆qݷ}$w}eLfzc<ւcXfh2M9j17猡=J|U,7uVaQc{ɟ\s>E|ҙ3sgњΜMiRwɛ]_4s{{?9gs.ꭴF/1fƂ<2f|ke/c9ۧ8_N1tf)q!=Ş7{|~Mz,{y~_x~}cOd<ցݡz&ic/ɷwbګFǷ k{ Ͼs͝^b>xRw-+Cqy#sӤuL[3Wz7s:kK%\?WsS]87{^e?fO}+^WsH30X#;#ͅZ0O!\Qch.~S&{g}}S\^9ykZd{;qh{JcK]B: bm6{rCRXsrR}7=~qpIo<#i}o5!aZ43lO$1Vy%1}fu{ׁΉ{%]%-7w#i~6y {+9}<3^g~zڋNgaٛ r94m>k͓sOrLlS9~U3=v͔z|'\ϝ+u0\7#]kO1>f{i:xzzX%o1I\O@N99gi>)78?4J}$kꗱM =8Q3ew_igO^]4y[wķW9'[so4͋枇vLy{㒸g̬ccSǍ>d/'6o~RTߤTz3o/ا}۔GSe^!`I뚇6vM'Rtc2Lϛa==.5m1Y OK\c8zﱇe\K=ۋݓ=v{sĊq5ǍaҐ`\z{WVa\g:~zIgm>c[Y1 uxذ/s]H|l ~_K9lHp2|_o=kx(kĵ>8WR<Gpn g{݋u[sg-店֯mq]>$k}q MT;ߘ):ٻjxGOM2'2^s]3Oy3}3\-n4yZ9gäύk5u>qzi-^П8%Iǘ>w^c.p4^縹~g9ץe~ߥ2s9UW0/Ń4¾x?Ǣ^C40>os]}Muk"s{`kבxL__R[+c4s}л싎2C=?exc6ue3{{5ؙƌudlOϗI c60y/GOYLs-ؿl>%N":N֏}5Ոm7Ⓘw1_W:e7<ߵF4I CzԘ H\u? |{s>ٞ9,oߠI A~r'5ZsKTY)=qlizbx-,kr[?y|c]xHL9^;mA{=|3ar(\KM}.`|;>ºvo8/gӮk7~Cr>&z<2v r>'wX!3I9Eiq>{yWK.>e{fz6|﹎ڜꙞG;cd$g\臚pmѼlOH{T⦅p3ǿq:\Wkgds_/<׮yBambw}GBcob6/)?]?0sݞo3{^r?3961ɞfr{Mc qQ889װ2.>wY"f84G:=3[5(uMIoiq`6B}ώ\}3{/&cd5s|}3#]/Fƚs2fC5Xvi-9wăs!?a{Zoޅsvq53]YGi6/j}U\STGwa_c-y~|/@]1NΩέ|q'Wu0V409w,Xna_=O3coqMZAtAzo$unN}1r#/m]<{\Xq3asN+ッk srN#_Vh-}Zmi=ā5N󜛱I򍺢N__rq_ޫG\czg$On/՗soZk[ς7eqm3yk678oJƍ<7籙WSCƗq?4Oe|VdzCZ ŵH<ΩTO 7q>->O[wg2_bog̛ i_d<]s/ƎtI@]:3fx8[kcGaD .܏ԏܺW&MgqLƈ\glR-; 5NTϲϚ'M1wM=1O%M9"m.Z}&ރuYĊ82ޫp.dLX>]' uk͞`L1!ׁ-'Źk^Jzʘy5qM^G {|M{=Ϗ^.}/5=>jy0[}IF{}µ=)jIi{z:͞~˶1őa?^\;c{{at]9bbXV8SZJSyX8)ߧz9 = W }+:ѵN5#{7yb^pL 5br۵09/56ezؼMΧ^X>Ϩ-|K=YsxKAx0YG-k^-'Q7RGI=Yι+WK:5sU{qc?Ǹ.I1.)/'> cޫIO'rgo}t~(7ܗxJ4\8[>syL 7s95HWЏKwk7k|Ik:rj1s Fn2qԜsM=un4ڜp}{;G2|eߞ\k75ΉuksNΝc[ֿsO#c`b{x~gƉcד:4'xS6s;yIϮS]oϠ'8U{c@=_-Kֹ5Fjk߶XIۏXkj$OX[[35ks#jeϪxlk's[_t`0,Zi,AӚ F[{ [u`mϱi㺄kg֐5@#kHqeΑ9A=øw{֚qa`x~zqRNԼs9N\0w꟱}g{ o@N웘8_ǍZc~OG?;g/qgyHo{r_ϧ|Iy9f̧ϬُV؟{嵧Ϥd #e{Ԛ8EiIi?ƜLkqIuϡ}?XmKuœ6/cjNsMLknfzkacg̓w{sY%?Γx45͚ޟϻsF?H6FEsqnnG}Ɛ5|~u/ԗ]צkݹr+Ǎ=3$ob-e_Z0W{]g1jŽ8q>ĒjN3kܹkX5qvrqeR}q0|Ξir}֕%A#%.a1H4j<>s1ƭ/Ƕq%c]Y^E4 'wרg^}V73zߚGc~Kd>2_]Fj6?מ^7Ӿϵwq.X0Lr33j9֫kfi\uwMs&7VW>3kyg Mzؼ$ƌ(s竌M%i|g s 5Y3i}{u5swac{=YO1^ 3cI#+$gg2Ɯ릮Xw7kXs\[;3ݟ||X6=9!:;ǚy~g9OΗygV٫ 7K)<Ϧz1%v]Ɔy:158O䚽ӞOy5xzL91o3'qԱ9gԇ=xB1Es~1)wcNFp=kz1_-X3'9ozA; cTsc~Ы1Oɗ6G oK\|Cg3cRkcXW{u4nշ{Ks*~1<Qb2uZJ^J$Oi|b}qKk#y*cZյs`O7By5yu 9. CO?r1p}g}Rw>/,CK~kجWs.s4X3]osbf#sxr3Z7AGtZ#44׵=qsq\s9=usB36M\_Oko8ni_qOlNK;6֪~zƜ캖^Ae.kcYk85%51W?^smֵ6/k8_,x^=qqv49Hޕ།U3S|K~zw'^[Krc߰n9=ygy=1rNׯmă:o4w=jL1y悥gn۬w]rAZ9L.JtMxfc?}u\NxӬEIm6uѵ[ߜwṥyxvD/wB/gNS{^wý3֔hsncLϓg9=Zhb{Z$-5s5{mX[7>7zKҤG>ǟ5׻Cğsq]5YshkϺZTZS|؟|xGS\Oҽ=|~iw7:OZ1O|.ξ8/g36us\=K3zؼO䁩s)o1=kPC2o#mZևc,ϔӵIr:e]ۃrIږcS_a=6ƘKœp-rlױ=SkƗxP{S|mS|\?8ܯ=Fͭ'A},XzK\03=׫Vv(f2]?nlN.|1an*uukϭy`-؟R޾`G>Y~7i̔?#MC T'ྨIly}15/0f;V^ٺ-j6ֿF0reސ5sk1ӕ#Wxk~4e!Yk{߳%w^36|ueoaJ̦8N#9ĩEl-y ľFbX\4'Rx/3M]k6lΓ䫟%r>}3ysXl]l/:s!ÆuSǛu4sޞr5f;z!Λ\dզəKsy A)q^c_{:Y3鱽`zK,g?s+=3#ƌثsHڳj\ysެi ?{{3OO|sƄ5x3A ǽaY͵8W"cTge;4==l^0oG1u}~Va2ƝpM<^ ys afZ7cǛ^Zlrn`)HPǎ+RȇCu!niMҹ!Euq9VK5cދ|n><{)Gz&a{OZ<-iC~ĿX5 C:O<_HJs5>=Zf9Nc;M zg=rnyy/Rr_G[#0|&ᘻw>pLVZ9O1kvtGoιfH{"g7>{h+bA}xm,F%i\\=~>7ꇞyigL{vc鸦=4i?bm@cڻk3|5yZc B^U߾jz/wyj!z99, >R<ks.'?^;>JV>pZ|6/iObm?$)1Ҟvg>'Rj%z{{*6wA8OwG!OqkZ=a]]?H3o\sS2z܈Agb}.#)Kgbe&ܜ9s\=ETg0GcmYW sur_g?yse?fM%Oy̵Huy9kmb6GWks3<3ϛ{ g^;ϸ\w9r_Ky+ş:֎ =s1gvLMsvHqp~si='g^˾ZX:wS^zɌӺܷ64GXzyx ~7̚vg}{Prx+ozVkzcv=q,O嵺1://=7s2X­z=l^327O1V  =5ŎYwIa ԏj}C9KJX$=s5%F|O,yS?}|{RFO\$?:]8ִ>X{~s)NAzu8GsOOC:rK=%o;XdiogXp[ls^]6/7ŇL5͡ALOazd=N><s0s c;71k&z@Ӽk2kZ;w$^FȞc<aOWZ/1<0W\?y>{mog{['ދ?s$o9c`{eZok /MI+ o>q35Q {J<.5v̗)NհK/sOj8Irv{i r+ΫW:.+}3}>8ٰk< \צּqkvX{}{{18&n~I=_Kkwf̓tu(Vɳz>Irl_>/g3tεW0hk0F88y8x!qmy1MF0^R.J3?4}l߽.cm.8i/>z֝5ߛzؼ8.6G=oQIY{kΖnfS |Ock/`m-z{^ cA۶Vc&=ofVҹO,7seo{؞qn;=_}þJǡ6kO)QixIj=GC|9sZFf_o|>zؼ+M<PO5g]S^h =^_b֒lײk=l^>y!Djѹx\ZCΟMq`ƛ}>r0vc1rk+'1üx1Wpǖc4넹ow]Anfwsq#.øCrй|!N"&7ŵo_kbp@wW纴9 1`>\e;i8'.ȟ^Ǜ`!We/mvR/Cũj&|d]XXOā8ۗWGMX 9z*i#ic\s5d!s5`>7|#XaJ+p ΍bg1uq=$aס טS%79߹nsX/Ø9ZFkO|~/pǮ?GTziLi=l^qc䞁9ع{;y^}EyWPKM;TqU6/sx2$FĎ/=֬_Ǥ]c_ {J\3ԵVA19XiAlF3D(|dMKkٻxkj|1XK\pg};t>3=Nkqfs6ƪ3J};!o?k7nS'"g]7C1kѹٵdLSyJYA5gu<;Wߌqþ65zy6zz9Qp^$3cMُ&aJyj;:cHp= ^5x,͹ik֚1n Xszz4\5MJyZ06sko_ u/'L׉-:=5,,Ǡ1>E79?+8OxuQs7;sFA931k:Gsog a9!ueJhΕ5 #T؋ͧ~:WZ,Ŗ<ɓα<826 /IYs|j=l^^;Kc<=Yjxo_>ߺ&uB~1>:ِcùv%.S?5~|-计nkt}̋'y9o{~%c霰,4&(f]\π %֧'X'~\'54D>3{E}S34Fg9&3|<꼴}vZgg7#ꢭX:Lzۈמ\JcP>cFlɋ-U^8_s0Օ tƱG=xއ]\{ c8x/k9f\܉Ϻ'z/6uzs'qgs\7XϮӬZc伜\3%.gg۶ƹt%_Kkb9t_99Ws- ;"v+\;n6/ꑱ >Cps^j^~qoPKBx/}'S~ǦZz7^3> 45Bc\߫ ][Ke_纩_zi4و{̃'m.kc}~y}b= =kpDFEOrbBӇߣ2[ƌXӴgT9Oq֢k|b3eGK\y\j ĢmscnF|uEob<(΁t> 3OrTp>I\!aN[c5Kثkz)vF.Q/?cε0P+΋sr$>=[|vЅ0ש~嘽,ٿ^֖i،IPKZtMBcm{ܗ; Ku\kc>v$[-\Wgz{5Γ|2Ʈ{1|GlfT[]j?o:qM=۵.א8js}5ب~9O|Y zؼ >}`\X{7\e|9\ f75?WS<\P-9麇<7r۸9Fέ޻x3gxzؼ純w[ѓ&1cWW70(YO{5?!c_wjweLgq%y5ĵwgsں}~~J43YRu{^g~kh{o9zf>kqޮ;~y5q88!gἈ9eKX9-wiY7X聮s&' X\;cek-||6I زu[9[/>[;lj)=m& OXghp&Os=l^sQYAkw-}ה#rXsɯSl\[15QcqC|[F،;ǸHIQ{ya/s,YcC}қP?+jdrv^dp>e^'N3{36vS,3׃Xđbx/kc:gṣĠlՏykv6/<˽9kZ9s|NԵcD0_9b8y2>#caػUָ0)DO#0kq\^R}=|\cl^@X3;S&6a\q4@:z\>õS>f< V?\3`5ʼn3oZa5HK%\c4s]=׹6wocCn[pyq%MۏzoO^a\.[vkq9༛j \;fM/4oq86_κ{/E][GAj<ukqmF>4k=r.+4>75z ƵX6W3j>sb=Pkvc9=m$_?3R99QdspsӥcR}kHs~9Z ɺbƧVx/ul Ce#sAϵ.P+=cOOIg>y FY?%Sx˵ܗ go{Sc,7̳zؼ]+ѫ|Nիg"Wsr})rq8wk}5無kr4iyudLÉ |5 k-mjw\wΗu581c<=Οlkt<"OWqO`R2WcܝùΥw>&_}s5 =zM{/ǒcy;/g~t2G_.9P+$yuk9jŹ~wʗr<}wEL[^qWZ71&]w1i_Z_fyTu>OךOvq]\YFIﳟ. ו<ŵ'WObG bn\z[k9N];3>xs|!HqzHI wk ,yԹכ|>8o+%gM;3綤ep3=o ϴ_cù!k(rg|>c9ړ\/{_\ξ90'ЧXCϼc]:\59VcsKyj ab qyF&)=Ss)9usE”9ȞNqMΏz]QsG#YOzal[0%Gp8'2#KƙkwxN1+=?v7yɹfb52_7fk1rk\ދ <DŽ9=>o9/\<}Mc\j^r=i̅sBm=`kv>\sƠ'1>ԔM4gcz=k߮oy5sLsC\g2~i<}C`>y>9ǘ1Nu=Hyc}Mkf<:S>cq GˮWt~Z{@RθY58G7待~7?i]s麀Spz5VkXO2i1{Zd7LOk8O8L#kKNYc;:#&3޷/9+/rc>]OsOG]yk4jÜ"m}`‡VZ"Ocٶvbt(8Ǚxnm?yِ{>?Ձ-'۳Y/3!\pܴr-Ͼ}Ls!>&>u9 ^vE_#=9ֆ}طR&o隆Lcڃk^L˟Psqy}sW+y)n{y\o>K_pn]=5yv^a2Gg;រs,~hMS{k\ӾµM15N1G3U{(󔯬m1_B7)ƉgN^J.m5͏c϶Fu3o\`j/'xo=geҜsq k5<ڭAf]mz>EI뭇W}̹Zi~G~sdliu8rTP&$%Kb_'<`~]c5aCL ݵa/or0yq`߼za~XhlxZΓ< -fƳZ9Ƶ.tvs<+]wnL{'s9|?,^ȹԵ0x5溄yKygfar{yyfݧ85=s}~hM'O{/>";>ubܼ K4:g\5i}Xk"΃[_گ2} =4g9TO~KX>_Mb r=_jk3$7>A_%Һ+Iu-Es֡i/H^M6/cn;'k1fsrL^i1eqr:$z>Aq.X 1I)swebmA̬U·/\$'F\>pRt %qqc@=׉c@^΢jmy筏cQcϟTs^e5d#ƗY+m,EPǔu>ކ5~7kɟ澱r4w=q.αVtwq'E78a"Sj>o8ԕs?=5*5۾uaI9OΝ؜f]?5\sX)^w9fO)y.'D<\;M}QY1y5{m?ػ]%/c{~1sjc5Ğo6㯇ͫ^M0ПEC͹u7@]bܓ.zu3G'fsGdL\:yzucxSZ~͵H{Gok~_orlC~YpNpgaܕ__~q~ݢs9qMsYjbA>Zs:Ĝ7\רa=B}#_\V) KXyWa97sz28I=4@<^ y!{7^bgi`K{niNn=?3zy3qs<'{KuĹK ~1&=Vb>\g+l1S?-<9y91xnkyy\YاO{ϛb8zY@W[ޜ7֜7;T1r''b|mhJqJ1I$Cwڽ^}/Ѩa?NԘY"o\\|M̑^wEVw8q^M<ӾĜg{y>{ o =Y lZӚ|{c/J}Bc=4@ߞ/{ }:[m\ bx4}?A =4Ꚁ~0Ā,1#hRcZIڶW`{Y/p=z6YZ1/[kY+/BևN8$OwBϮ/6Qzߛ~{}\aN\5]}LVZTZD;Efl_p 9/tc㸺Fu<g^w=y~ יr5i{kosktfX{a{ip8TQl߫֡y_9<{s^1Ke^s굹u5O3f;EST,t|f'{?pOy,Ƶy^dN0oOƸ+E4mﰏ{45pΏ-|ktkOkS>}u}>9q6cOr2ϓo8)ruȡx/*/Z'o諜Kӳ`٧7mOw2 !]GfnJ5xnJvK5sTW%%nM>mΞa^:+աԑBGzQ/ԗ9<:ӳa7 5%ׂAz5Mc!l:5jZqZۘR&|8nJpt:3T|MMk/kJOII=z؏K:2vSyNvM욡Qr3ӸN(eܹoskg +i\8Wok<CO9 圑8cOf a?JKXw^m;1N1KqI4\7+ _rq G`'8'We#֗qjxskזx9SNpe Z#oY'>b i=+Xs5b:;ՆuI9{3n{}wCyO썜u2|"癉qKyu7:j3q֡s 5$yCҒCר4c3I<΅u!d̬^tf&yniz=?)y*#\;N?}s{:눕=g";mbcC܃pLޓߙGoZj-C={Q{Ky2R3B osr],-iu9tߦyɫza>'isߔe?GϕX4a_#dLi +i/\R6yL֎g) Sl^g=ۤm{q|IϓKZ>{$18&]bMqy*k^`YӾ_Y=gwZ\8S\^4{s|>!դkcfގMM<9˒Ҝ!iki{ ۿu:^C\8tb'~{|ybgoGg'Sql³=.$^z1v\f`j|SI^yכNXO#yxϳ13rzZk<㜔Nгiؚ<^MxߐI|usU6.zi,i<6=gƝk|Wqz}IH|os敶Y9x-=u'R<yPcpOa"1`Cu9H^jS{RZ{==sZ WyBӷ6~WcՆӳ?}$}k_tz^Zku<[Img;Ɣ&7gI/78_{\o=ܴ׿O~uZI0RSMsg~Y{=cljo㙍^%]'=x'~r ~ΗP<877Γ51n{o=l^ǵyc9{Nƿ[=7zIˇb,;~^g9=_4{a^OykwT_aԏ=6>c-`g#vz7H>ONs-/Ιb±W3Ix^:չuaϷb?> wnr̹'zt C{~0m_9Yb}蜝M~Q1%\ͫ{Ln֩Rgqz׿Q}iE~R\tH{3Oz{\x-;o'>ga4ܙg̒gy;1dM==s,<%>69/|~Z_ |{P_|{C^ܫU^WT:OM17g^)y=^zsOs6=YG_ٟzؼП5fҺ=HyʺO|NyBvb S,/j9~|})ϵĕcړ?QE={Q-;r5ſ=?H|}si==LR8>Nv3g~=l^C/=<}1HK}ns2cx>3[LzuK筞OpCO\qcZ "|>害x|k2"ysoɻye1ֽ\ffCY^#MXMT+]Kscȹ@;oOk.8sU!qto#z޳#Hz9ը;S=^֋wޚ=eĹEޚo)FO=Oiga#z|=l^= ={1?nN:I1=]%7gz%rͳΝ{>4?KkMe?=?O3ܷ5O?5{Fz.C KgrWj0iq&<m^ZGKke[B7~W6>qd?ϋ)e]T\c?L'a%/x:X>7{}0^k=l^y8%/s/plx=;_Ǿ}}L}/= -1>zc.WoNs}t\}4ޱհ4a9<͡u114a߹n߿D5enp<4/rmoǘCZKk!>F?sqRYƂڷ-mֽyla>#;ßENSWsq~ecn_Rk{ 3忴Gq֡u̳gr5k;59s䞹2 cel-^yyOf\|fqcg=R.rN>?'I1̍^}Yc㚊X.:[OvNyy>q@҃_C&NaJ3^߬{y&nN㘑/3"ü5/i܃3y)nƘ{ichgtv{76q3g$?O9<ȴ^9r}Xz򙽽 0~׽7i\/Jٟnaaq0N'RyիOZ;? In^-6/?G_L55X0f]wskiOn%{ɓ4^g8>^[ Ob>ĚL9sr^[9}LO{BR`u2~{3_.5R7֧=\p8q9}s}x&RMnC٬㉋CSt}& SlF{.cgz<<}D3ЏD=C\xݭ>uw)` oͫ翜s}|)3j|0W-:s"K=/iP]c8f.1'6Ggys=cTC[?{=oO=^z>uK/%ꍟys?Ck~nܷ2C1L{~z_yߗ%}{=4z{/7ؿ?=9ξqubۋs}|};׉Kŗru=KKszOKu;_6藌}hxb/1y_+xKġƯǡ^^Å^_ouoErK49į_2uVrH可/IJϭzٗ<0{/еs|{+<\|uF/|I *f?_⹽X9:_?^ӷ|_K=㕇R\iu9%>Ǐϗz%~]/^wC1zq|<.^>4|%1|]|,S߇y|Kq]޽o/az%}nқ!.y=P^?˳puxqH߯Ó=txs~+s|]Nuu}^scjzxn-/^KxףoWZ?/K^ _/g^%}.f:C~8x8y//¹^94!{=1É}nޯ|uZs<8˗и9~.q9u=ޗ9j=u6}9oFO}~}6;@wߑqm<7ە=B|?S_1YO=^=4޿\a;7~ΐy0俙is>d3_ۡk JoⵧiMgFs|n-|3׌:-ssKuC^8Ը6wuw=g73BKc9n3ùm 3g=wb!_~-C+7G{{ _'~[^m;ԫE_,l/'}Ӽ|A<מ5{?!q/͉1tǟpPsg_ڵ(8Tw{ xG'>;IέY㜓k3]ߎ=aEc/&-ӳ@\Ռokֱqm#2X\]P =k2ƄڶXa q:~,ud+=.Ss罏Ը=c^y^ycMUaj9oO{8c,~_R >~W3],`jRW{szNk|>>i>>;q=]PONx_y{{>=%;kbwc~~x_Mj;NSQlg׻?3|S>2Sܾu}` mky1ol3|).->bNi}1~o'baL?s7=|pq{><=燦?7ŴsÎ_Lqn5pn44秧;aǛ6I׾ioNpqyߜbiv?4ŴD{ i״8}\g3a;6|秹5tÎ04֗' ?4>7]ova?7wiM}t~zcÎ[9COLkklu<9jӘBv~aW?|pj-v8~[l9=Î30|W>?=Icg[,rϗiqM ^ޅ86}4|l@ImF[sok;9#S?_pj:W3zw[ ejoLiv {~?00o~~4ϖߋZNaivwNc5~=W{{>8Î'pksx ϼ ki:mͷN.'~aZSޠ~_{. |߰ZaW4N1hysyBoaǗ]G{o{''uv{ǿwaWS??;OXK'||;==+ӽ#?7Nkyo 㜾sZ)nq—>mmu󻧟~kZx[kjyz۞4jva[3S??0㻆}kl{6ǶV mZ>08iy>;jҶbNg۷oW{x1L[jo߆gy~شQAގ-G= ;cO;h>17o:m/j;||=|12J>;+5hy ~ ǿcw O؝x1}sQ[.m~i?gHn9c.6k>>7L}h4aOG?w3oLsӶ'ijON96 }L|sZ}~jvaDžO8<]]x[0V79O ;ʹ;jvim3j/;_~'{ |vVg;[h56?aW|4]~τogC\y zînn` ^mzyq[~mgm}|ޘh|ۏ;5lvvZm)vnsm9qy9iW'z㵯Lsn{wMi+V~7(qMq"m9}t6vgav\nݛîjCxAx>δk3{}dž~Zhkzk-_Ω<97>?3|͛ښx.ɳVKϯ a׶nZ(G}zg;%N͓y[s:{ Z}x&mм<-gK:a3?[rkގ>[^kgawۦ?>mưқCaﭵia wvawM_v;gYlgOv?dvo|vudžy嗁M;Cxs飝_v}n3h󀖗W\j{9w>4.g|n}awr9ǧkm߅Xxc폿4vg8Mk+îmiVa̮K|Ƌ惭vmx v' x|`ۇ}f>l\9AV6<lšũOۙ»ur>/ۇgxtK7n}=>m_@9_/k?> J<mz-w|f ;o~S|ھy烟v_v/°muγvu1i۴x<|=>Og,<#ilg^va77kɏvguyg{AvRZ=˶~B󛶇yîl5G͓ymr휯a~;h7Nkh^>jor_V1glꕦ7l盇YMM{[M:3Ok&lZyn~6ng.d^jܰU|g88/|{RvgNͯ{Qi_6=87yk~i^4mO }iȾ2}ҴؾjMqİTN;~cq|no5|bުyM\y7goO ,?sq K߾+߾'~pn?}⇶O>C'~ho?}⇷O>#'~dďl~~rӏ~'~| ?-7?XTGcjD?W_/V+jzP?WOgjuX?W_j[SIjgΫ]TvS0)ż-U+**f****V*wxUU,}U.v0jx_UЫ_Tj[߽QG~jSWTjP?WjWڏU~o[j\?ڟXOVӫYj^?_P/W+j]XoU;= WU꟯V* _K W߫ڿ]{U+ _j_V8* _ WgUqUzuU*HY븰:.oǥqav\f*Džq9qw\KDžqU_p==%~jZU+\ p=.\ Th?ڏV _[j?Q>.mjTp>JϩVjYWU+jsowT+| Z{\=.d>. ߫W+Džq|\Z=. qp>.DžZV8>KǥZi4z0tzR؟'ӓU*ʂ'ŁIeI?rR=)OJ'Ii?)OJ'I}Rx']O I~ ~O 0?)O >)O ߷Z}R>)O |RO |R8jR|Rz>)O u?ڟV|gT3'ϭV؟OJ'퓿Zq8pR?)NJ'ŃIoV8)>N'ŇZqV}R}OU+>)ݟO 'ŅIR:?)OJ'>)mJ'}R؟'#P=VZX֧7V+OK맅i|Z>-lOKקiU iUiip>-OKߧiU_VUiUZia|Z>-MΧ٧٧٧U+O 4~ 0?O ~Z؟iaZ؟OjW#diaZ>~Z>-?-O p>-lOKIia|Z:?-O =-\OjiayZ>-V?+?+?+ jŅggʼnYiV8+NJgʼngmYq㬸qV8+͟jœgőY㬸qV8+>jU'ΊgFYq8pVVV8+gYa~VVUvVgYa~VpVEp>^;yyyyya~^W]v^8y=/lKya{^؞~^ yi=/ yt|T+-yaz^xyp<켴|^W>}^Xyy~OW+]W?/ ϫ>/ K[p{Qʹ((.JsEyEi.+^eip,,,/ ʽ{/ ʿ幗幗ʽ˪/KV^eitxY^.K{字字e{YZ,-^/Kre{Y,/Ke|Y8_Η,/ ʯe{Y:zYڻ,/+^^/+^旅eea|Y^^.7/ vV+|/ rep,/+^,/7/+^ΗU;]Ɨ0,|/Kee0,-^/ p,\/Kea{Y^>/ ˪/KV,UЫ 0*LJWWUWUzU{U*^U*Lc ӫUzUy{Uڽ*^WUa|U_WUa{U>{U^U***^*ϽܖoŅm[4|[umi8p[}[<-n |p-ᆳ\}[޷mymim|z[U#p-o 0=m}[8ߖo˻o |[淅ma~[޷ma|[Ʒ׷m}[Z-oks[xַmymym\=w]]{WZ+ʟ ]{Wڽ+Jw]]sJww ]sWܕ>wٻ]tW8ݕnJwӻj+|Jw]i<+M6w]]rWܕ ӻӻ+\ txWxwU+ww]i+< </ݗWj󾰼/,K_<0/?V?}x_>z_/]~}w_~}w_v}EO&/ݗW./}ԇp~(=K ʫRpP?TMjCP|(/}(>CyCaP>CӇ0~(|( 4P?&? ߇; j0~(*>N?J7JCyCCCPCaP?T P9(.z( p(>vz(_[j?PxPx(M? ( ʫŁCjCi4P~(M?oCi8P\x(.< CP|x(><>TN}(<Ņj^jT훫}K7ڻ}ڇ}GYcվڧ}ڷWjY վT}O}վϫjU%~jG_^U!jXVU~گc~]~OWM~sZ폩Tӫjv?ڟ_/UKj}ToT۪jgSUjP?QퟩUmjbڿR_oT;j^P?UϪ7jc\qa6U+\gjwjVW Y:+LgZ Ya:+LgV T+lgV~atT~j?\߯گV+|gpujگW+gW OT+g쏪U ٟZOVX ٟSϭV8jjUZ>+gvVx ?RU+\gdZ9tVjftV Ya:+Lg0'juvVjVΪ]TvWa ya=_V+poVZpj4=/= ya?/M?Y ^ y?j?Z>/?Zy~q˪?R?Zq`^:0z^X y=Gձ0KcX>=n ˣ| pjӱt:V 0Kc>c>f~,ǟVzKcq`,ݎ۱t;0_Ň80X>=?VKcw,KctKcd,ŏ1Xcqb,Nʼn0V˳X<ci{,˫_V+X؏cizxZ ~-˷_}Y/ ea,^}Y><,M/ ea,^_/Kx,/ zjŁei~Y_p_?/˗U{-jW{W+_旅,ܗp_|Y/˧zY/ e_Y/ˣ^e,/+/ eeXtYx. e,/ZrY8.KoY^voVz^,p\p\O/ UzY^ei{Y^uT+\|zYz^-lU UyU>*\W|zUʧWѫtU U*,WwW UxUJǫpU*J0\wʳW<{UyvUx UyU UṪkU^tUwU9wUxZoUu0\pU*W4*Vߪ*J쪴* WU*JUuU*lW*lWU̫uU*JʻwU:]Uݼ*W0^oJyU8ʷW^Ua*^ΫUa*lWr*lW]nW*GQi=*l ۣ{T=*Za}Tfʳ p>*>*ʻJGݣQaTn ?* p?rQT98pTGQi>**U>}Tz>*-Gߣ?* ?*JGGŁ*ʻ GQGŃQQXxpT ꮣQrQipT:?**U?*>*>*> GQpT|8*>GʼnQᨼrQ4T~T|8Tt?oZuG~֯5?_?rg#ol~N_#k|z_^|nM.>uܗ5k|ZOnЃ®g9P˯ڟצ?GGk??駦?ݷ7Oohoڏ6x?k=psych/data/withinBetween.rda0000644000176200001440000000051612006524020015606 0ustar liggesusers]SN0t/hpɮatM^i!raOea 1HmXMS .-i/ӒP *ˢ|%5 -(h[gdžNDqq8{:X;kpg 1X'=Iz!pp1pJ?Id_Y3h3%seNv4Ɯs~|ߘs`nAߘs@c[[sS}zʡknuΡ{z=ALVtXG2m~N?0 b|Ncq Ppsych/data/Harman.political.rda0000644000176200001440000000064312053533270016171 0ustar liggesusersRO1 \Đ茌8s \a`9jpupqrqr`kjep.gmV֍XFqqL2(%Ln6]k0f;(Rn.wK(4|0VUy#i5 )+ +5/_I|W4>TEyƛwpfN<a<GO7u~N¼g P/ "<|Us iP u@'?IN\U+! HR@bO4.pnO_Rc>Ŏ voO葶KNl0Egjc(nzǤ{ ]=%v@DW!Vf,G_Spsych/data/Schmid.rda0000644000176200001440000000515411243601416014213 0ustar liggesusersXkp΍LX-ڊVmA7VJHbR[$4 ل PR UF.BeJ6=$n.@:w<'4'43={/{vO92qfbTTTLTk-bQQIWUՋ5 UH3z[FkdWHb)y—$}.ٽfò>TMfȒO_iO%/dckeÓuůxR{EIwJ5O'Ɂq7{KKʇE/xi~jEM& S qٮ >Y,8wrddHvͯ^5#N-oO.0ߢEMaZ{ lO9Sy{,٤j_xi~nijX?(JseGu#|̏u#ā.aiו-Y>_xi~vXri|hlٖ0WZfxF>0/I/Vɚ>fgVɳzY:`|]?JdOrٷrO;K%vgyO~TwlO}a "ψF|?'W%WR W6T XTO$X9N,KӟH1{uY2f5Ƭƚ8U쌔{)=ˑ!\P;(~ߊߎNoo|X+~og}[⽿o߈|[V|߿IV|}|'zYNy 9Q9x#/w ֟堺~ϭ2]IRO,1Ͽj/3|ۼ>3H4LJ8"Sj@6r다t͊fH]/ XC{U?-~՜ZǥJ~pTy-KR{׉oJtɇq'WeΗjn6;^_zFJV)AeYÔ2E%:'B<Gp V/p;X/GAOj9S*t $U $~I~.ăqF GO[U"+WP֫t@vJ`_8!)ѢxvSu!xg &u!ZQLY>v׺Rm C|_ q%?X8BGu]\c:n2R}K䇩 y8ڠ/zۅr7|*TMA3B7+vMmL^/>r - 91@`>K2nօx00t\a|}A`ݹOq0Mk5lJ YB_T|[*uzJ@O ?P~8"PuGk*iTa,=%uȗǹB]ߨ[fx ~.rS+aB&:ug[GhGuZ&wTrJ,c,UBǾp!x: P!83_ gԥg^sB7xrW}3xh@O{qĠ`ps]B8.I\`$5iZKmOA6ktY&>W/p6o_j4eiSz$]X!oAoE83_MiggcxԊAo cALϑ?ȃx~Ͻ^`nu|@NO;^Q7ؿ#Bq#Sou>b=+^/Kq@؇֗83_7J@}NR/K/'1oN88Iľ1>}%̗~'|9C곙_WG3yŸ3}(WʤoXӳճݳ%kޡ!psych/data/Holzinger.9.rda0000644000176200001440000000110211541200477015104 0ustar liggesusers r0b```b`@& `bN ϩKO-ҳ?0/윢:Z]P('c$}{Ri)M`y?kXh̻kUg+;i$m'^y̰Wr>f,xb~烄3WKKfEafʛ?ڟ,3}4'3ٿ[iB`P|59u+Vs?~~yO^us'.%nZ\ xs?~~ylzn͉{SoXs?~~yNtnt}  Xs?~~yѝ X~0jZReNR DSTZ d C 2pe%CɥI``ɜT`N s `<~y~QJnjbYs&ħ OON,(Y5psych/data/Thurstone.33G.rda0000644000176200001440000000072114265626743015346 0ustar liggesusers r0b```b`a@& yC2JKR݁*fUER"wW_$tX?9w߯R<yEr#~yWϯ:;%볦+"*ipW0OkZk{ќYفv~|n?L?̼[m^_>?~XW;~!{5߷NYw?~~OxǞIϽ6f&4W.j(a;:(d(5Upk?_%xs?~~y`A! R~0fél*wt\ ?xCa{\'mx~0P+sJf.(͂3X0Fpsych/data/Dwyer.rda0000644000176200001440000000052511544424006014075 0ustar liggesusers r0b```b`@& `bN fu)L-9QC/nƾ:j\sI?In?l 0}0swYLXb*F냹!*rVN}̹ *گ'g@,0wB郛j]j pwB |P>,<ԝ0`\Ұx'4\`>9h9%3H(*KM-jALape gYpegY s+psych/data/Harman.8.rda0000644000176200001440000000077011621772274014372 0ustar liggesusers r0b```b`@& `bN H,Mӳ`+`p߷y?Fa5o'fmԁf!W~fyj#]E^3獡ˊ+_o$^]}/3yg d͡~}0}0s^ͷUnG.ڟ c\HCu"pw郙syN7W@K+_[̝0``Ic K?;ayM|Nl',\`fL̜ pN}0}PssJf.'!`r`$TZ d C @Ղ֡ά[;ߘH}<^_MLìσ Wr@-'7/7  ᾧ}I:?峔÷ȶ>OfD^Zm7M'|oyOztO# O.ƞdž-{w-m[ Oyx)\(Gg/`#~h 4{;<XW=n>={\=9GGg|? H?\n/ /Ҫ=pԹ;Qqn7wa~>*r;ߣzyP0+ԾY0ɥ' ^V\gM宾( Vokf뇙wsπTW\M|f /9Om0aa潘q@%{KWvb=Yu?o?y[1 34Hc36(_giԁfOA~~00,e[A#_{Xs?~~y\\b J_xAs?~~y<!x~0fShf,a?0a%U\ N `!N4%@%y@0TXT>(5-(!Js*"y%y%nX\Tq q9䤖dAEN//(/,Ibn(a%[apsych/data/Tucker.rda0000644000176200001440000000075611312217005014236 0ustar liggesusers r0b```b`RL@& `bN f )MN-b``h ``pW};|a,KOn#]}iP-5@׭r;؟)֮7fKW~mnP7``yj#s/f/}9tRbdOV~V4y&y03fi.N3o'fmP6By3쟂7>03f׹l ?(ڠpd2+Pϣΰ 90s.6n, `3:;a=73,|`fܧM?p gXsh5/17 0sij5E0L#c44@fnrNb1~ WJbI^Zih9d L3<psych/man/0000755000176200001440000000000014551520565012163 5ustar liggesuserspsych/man/reliability.Rd0000644000176200001440000002641314524267366015000 0ustar liggesusers\name{reliability} \alias{reliability} \alias{plot.reliability} \title{Reports 7 different estimates of scale reliabity including alpha, omega, split half} \description{ Revelle and Condon, (2019) reviewed the problem of reliability in a tutorial meant to useful to the theoretician as well as the practitioner. Although there are a number of functions in psych for estimating reliability of single scales, (e.g. \code{\link{alpha}} and \code{\link{omega}}), for split half reliability \code{\link{splitHalf}} or for finding test-retest reliability \code{\link{testRetest}} or multilevel reliability \code{\link{mlr}}, the \code{\link{reliability}} function combines several of these functions to report these recommended measures for multiple scales. To quote from Revelle and Condon (2019) ``Reliability is a fundamental problem for measurement in all of science for `(a)ll measurement is befuddled by error' (p 294 McNemar, 1946). Perhaps because psychological measures are more befuddled than those of the other natural sciences, psychologists have long studied the problem of reliability. ``Issues of reliability are fundamental to understanding how correlations between observed variables are (attenuated) underestimates of the relationships between the underlying constructs, how observed estimates of a person's score are biased estimates of their latent score, and how to estimate the confidence intervals around any particular measurement. Understanding the many ways to estimate reliability as well as the ways to use these estimates allows one to better assess individuals and to evaluate selection and prediction techniques. This is not just a problem for measurement specialists but for all who want to make theoretical inferences from observed data. `` It is no longer acceptable to report one coefficient that is only correct if all items are exactly equally good measures of a construct. Researchers are encouraged to report at least two coefficients (e.g., omega_h and omega_t) and then discuss why each is appropriate for the inference that is being made. They are discouraged from reporting just alpha unless they can justify the assumptions implicit in using it (i.e., tau equivalence and unidimensionality)." Here we make it easy to do so. Although the \code{\link{alpha}} and \code{\link{omega}} functions will find reliability estimates for a single scale, and \code{\link{scoreItems}} and \code{\link{scoreOverlap}} will find alpha for multiple scales, it sometimes is convenient to call \code{\link{omega}} and \code{\link{splitHalf}} for multiple scales. \code{\link{reliability}} takes a keys list (suitable for \code{\link{scoreItems}} ) and then finds hierarchical and total omega as well as split half reliabilities for each separate scale. \code{\link{plot.reliability}} takes the output of \code{\link{reliability}} and displays it as a dot chart showing the values of both omegas as well as alpha and the distributions of split half reliabilities. } \usage{ reliability(keys=NULL, items, nfactors = 2, split = TRUE, raw=TRUE, plot=FALSE,hist=FALSE, n.sample=10000,brute=FALSE, check.keys=TRUE, covar=FALSE) \method{plot}{reliability}(x,omega=TRUE,alpha=TRUE,split=TRUE,uni=TRUE,add=FALSE, xlim=NULL, main=NULL,...) } \arguments{ \item{keys}{A list of items to be scored (may be taken from a keys.list for \code{\link{scoreItems}}. This list may contain one or more keys. ) If keys are not specified, then all the items are used. } \item{items}{The matrix or data.frame of items to be scored. Can be substantially greater than the items included in keys. For just those items in each key are scored. } \item{nfactors}{Omega is not well defined for two factors, but for small sets of items, two is the better choice. For larger number of items per scale, 3 is probably preferable. } \item{split}{By default, find splitHalf reliabilities as well as the omega statistics. When plotting, split implies that raw was called in reliability.} \item{plot}{By default, suppress the omega plots for each scale.} \item{raw}{If TRUE, return a list of all the possible splits (up to n.samples). Useful for graphic display. } \item{hist}{If TRUE then split and raw are forced to TRUE and the histograms of the split half values are drawn. (Otherwise, just return the values for later plotting)} \item{n.sample}{Normally defaults to 10,000. This means that for up to 16 item tests, all possible splits are found. choose(n,n/2)/2 explodes above that, eg. for all splits of the epi E scale requires 1,352,078 splits or 23.4 seconds on a MacBook Pro with a 2.4GHZ 8 core Intel Core I9. Can be done, but do you want to do so? } \item{brute}{Do all possible splits rather than sampling. (see \code{\link{splitHalf}} for details)} \item{check.keys}{By default, check that the keys are signe in the direction of the loadings on the first PCA. } \item{covar}{Should we use covariances rather than correlations?} \item{x}{The object returned from reliability} \item{omega}{Add in the values of omega_h and omega_t} \item{uni}{Show the unidimensionality value from \code{\link{unidim}}. } \item{alpha}{Add the value of alpha} \item{add}{Allows us to merge this figure with other ones} \item{main}{Defaults to "Split half distributions + omega, alpha"} \item{xlim}{The xlim of the plot} \item{...}{Other graphical parameters} } \details{\code{\link{reliability}} is basically just a wrapper for \code{\link{omegah}}, \code{\link{unidim}} and \code{\link{splitHalf}}. Revelle and Condon (2019) recommended reporting at least three reliability statistics for any scale, here we make it easy to do. If the hist option is set to true, histgrams and density plots of the split half values for each test are also shown. The output from \code{\link{reliability}} can be passed to \code{\link{error.dots}} to show the reliability statistics for multiple scales graphically. It is however more useful to just call the \code{\link{plot.reliability}} function to show the basic information. For detailed analysis of any one scale, it is recommended to do a complete \code{\link{omega}} analysis, perhaps combined with a \code{\link{splitHalf}} analysis. The \code{\link{reliability}} function is just meant for the case where the user has multiple scales (perhaps scored using \code{\link{scoreItems}}) and then wants to get more complete reliability information for all of the scales. Following a suggestion, the ability to not bother with keys and just do omega and split half and draw the results has been added. Either specify that keys=NULL, or just specify the items to use. (See the first example.) \code{\link{plot.reliability}} provides a dot chart summary of the distributions of the split half values, as well as the estimates of omega and alpha and unidimensionality. It can also be called by just issuing a plot command. } \value{ \item{omega_h}{Omega_h is the (model based) hierarchical estimate of the general factor saturation of a scale.} \item{alpha}{The conventional alpha statistic (which is not model based)} \item{omega.tot}{A model based estimate of the total reliability of a scale} \item{Uni}{An experimental estimate of unidimensionality (from unidim)} \item{r.fit}{How well does the average r of the correlations reproduce the matrix?} \item{f.fit}{How well does a single factor reproduce the correlation matrix} \item{max.split}{The greatest split half reliability of the scale. Found by finding all possible splits (if this is < 10,000) or sampled from 10,000 possible splits.} \item{min.split}{The lowest split half reliability of the scale. An estimate of beta (see \code{\link{iclust}}). } \item{mean.r}{The average correlation of the items in the scale} \item{med.r}{The median correlation of the items. If this differs from the mean, that is a sign of poor scale.} \item{splits}{A list of the split half values for all possible splits.} } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv Revelle, W. and Condon, D.M (2023) Using undim rather than omega in estimating undimensionality. Working draft available at \url{https://personality-project.org/revelle/publications/rc.23.pdf} } \author{ William Revelle } \note{For much more information on reliability, see the help pages for \code{\link{omega}} as well as the Revelle and Condon (2019) tutorial or the Revelle (in prep) chapter on reliability. For some rare cases in some simulations, check.keys=FALSE gets a better minimum split half. } \seealso{See Also \code{\link{omega}} to find more complete output for the various omega analyses,\code{\link{splitHalf}} to show more detail on split half estimates, \code{\link{scoreItems}} to find scores on multiple scales using unit weights, \code{\link{testRetest}} to find test retest reliabilities, \code{\link{mlr}} to find multilevel reliabilities. \code{\link{predicted.validity}} will call \code{\link{reliability}} and \code{\link{item.validity}} to use the average r information to find the asymptotic validity of a set of scales for a set of criteria. } \examples{ reliability(psychTools::ability) #an example of finding reliability for all items rel <- reliability(psychTools::ability.keys,psychTools::ability) #use keys to select scales R <- cor(psychTools::ability,use="pairwise") #find the correlations to test rel.R <- reliability(psychTools::ability.keys,R) #this should be the same as rel plot(rel.R) #versus all and subsets all.equal(rel$result.df,rel.R$result.df ) #should be TRUE reliability(psychTools::bfi.keys,psychTools::bfi) #reliability when items are keyed negative \dontrun{ #this takes a few seconds but shows nice graphic displays spi.rel <- reliability(psychTools::spi.keys,psychTools::spi,hist=TRUE) #graph them spi.rel #show them #plot them using plot.reliability plot(spi.rel) #draw the density distrbutions plot(spi.rel,split=FALSE) #don't draw the split half density distribution plot(spi.rel,omega=FALSE) # don't add omega values to the diagram #or do this without the densities #plot the first three values in a dot chart error.dots(spi.rel$result.df[,1],sort=FALSE, xlim=c(.3,1),head=16,tail=16, main = expression(paste(omega[h], ~~~~ alpha,~~~~ omega[t]))) #plot the omega_h values error.dots(spi.rel$result.df[,2],sort=FALSE,pch=2,xlim=c(.3,1),head=16,tail=16, main="",labels="",add=TRUE)#add the alpha values error.dots(spi.rel$result.df[,3],sort=FALSE, xlim=c(.3,1),head=16,tail=16, pch=3,labels="", main="",add=TRUE) #and the omega_t values #or, show the smallest and greatest split half, as well as alpha error.dots(spi.rel$result.df[,4],sort=FALSE, xlim=c(.3,1),head=16,tail=16, main = expression(paste(beta, ~~~~ alpha,~~~~ glb))) error.dots(spi.rel$result.df[,5],sort=FALSE,pch=5,xlim=c(.3,1),head=16,tail=16, main="",labels="",add=TRUE)#add the GLB values error.dots(spi.rel$result.df[,2],sort=FALSE,pch=2,xlim=c(.3,1),head=16,tail=16, main="",labels="",add=TRUE)#add the alpha values } } \keyword{ multivariate } \keyword{ models } psych/man/VSS.parallel.Rd0000644000176200001440000000207211431543251014710 0ustar liggesusers\name{VSS.parallel} \alias{VSS.parallel} \title{Compare real and random VSS solutions} \description{Another useful test for the number of factors is when the eigen values of a random matrix are greater than the eigen values of a a real matrix. Here we show VSS solutions to random data. A better test is probably \code{\link{fa.parallel}}. } \usage{ VSS.parallel(ncases, nvariables,scree=FALSE,rotate="none") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ncases}{Number of simulated cases } \item{nvariables}{ number of simulated variables } \item{scree}{Show a scree plot for random data -- see \code{\link{omega}}} \item{rotate}{rotate="none" or rotate="varimax"} } \value{VSS like output to be plotted by VSS.plot } \references{Very Simple Structure (VSS)} \author{ William Revelle} \seealso{ \code{\link{fa.parallel}}, \code{\link{VSS.plot}}, \code{\link{ICLUST}}, \code{\link{omega}} } \examples{ #VSS.plot(VSS.parallel(200,24)) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/omega.graph.Rd0000755000176200001440000001202614537105740014644 0ustar liggesusers\name{omega.graph} \alias{omega.diagram} \alias{omega.graph} \title{Graph hierarchical factor structures } \description{Hierarchical factor structures represent the correlations between variables in terms of a smaller set of correlated factors which themselves can be represented by a higher order factor. Two alternative solutions to such structures are found by the \code{\link{omega}} function. The correlated factors solutions represents the effect of the higher level, general factor, through its effect on the correlated factors. The other representation makes use of the Schmid Leiman transformation to find the direct effect of the general factor upon the original variables as well as the effect of orthogonal residual group factors upon the items. Graphic presentations of these two alternatives are helpful in understanding the structure. omega.graph and omega.diagram draw both such structures. Graphs are drawn directly onto the graphics window or expressed in ``dot" commands for conversion to graphics using implementations of Graphviz (if using omega.graph). Using Graphviz allows the user to clean up the Rgraphviz output. However, if Graphviz and Rgraphviz are not available, use omega.diagram. See the other structural diagramming functions, \code{\link{fa.diagram}} and \code{\link{structure.diagram}}. In addition } \usage{ omega.diagram(om.results,sl=TRUE,sort=TRUE,labels=NULL,flabels=NULL,cut=.2, gcut=.2,simple=TRUE, errors=FALSE, digits=1,e.size=.1,rsize=.15,side=3, main=NULL,cex=NULL,color.lines=TRUE,marg=c(.5,.5,1.5,.5),adj=2, ...) omega.graph(om.results, out.file = NULL, sl = TRUE, labels = NULL, size = c(8, 6), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits = 1, title = "Omega", ...) } \arguments{ \item{om.results}{The output from the omega function } \item{out.file}{ Optional output file for off line analysis using Graphviz } \item{sl}{ Orthogonal clusters using the Schmid-Leiman transform (sl=TRUE) or oblique clusters } \item{labels}{ variable labels } \item{flabels}{Labels for the factors (not counting g)} \item{size}{size of graphics window } \item{node.font}{ What font to use for the items} \item{edge.font}{What font to use for the edge labels } \item{rank.direction}{ Defaults to left to right } \item{digits}{ Precision of labels } \item{cex}{control font size} \item{color.lines}{Use black for positive, red for negative} \item{marg}{The margins for the figure are set to be wider than normal by default} \item{adj}{Adjust the location of the factor loadings to vary as factor mod 4 + 1} \item{title}{ Figure title } \item{main}{ main figure caption } \item{\dots}{Other options to pass into the graphics packages } \item{e.size}{the size to draw the ellipses for the factors. This is scaled by the number of variables.} \item{cut}{Minimum path coefficient to draw} \item{gcut}{Minimum general factor path to draw} \item{simple}{draw just one path per item} \item{sort}{sort the solution before making the diagram} \item{side}{on which side should errors be drawn?} \item{errors}{show the error estimates} \item{rsize}{size of the rectangles} } \details{While omega.graph requires the Rgraphviz package, omega.diagram does not. \code{\link{omega}} requires the GPArotation package. } \value{ \item{clust.graph }{A graph object} \item{sem}{A matrix suitable to be run throughe the sem function in the sem package.} } \references{ \url{https://personality-project.org/r/r.omega.html} \cr Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. \url{https://personality-project.org/r/book/} Revelle, W. (1979). Hierarchical cluster analysis and the internal structure of tests. Multivariate Behavioral Research, 14, 57-74. (\url{https://personality-project.org/revelle/publications/iclust.pdf}) Zinbarg, R.E., Revelle, W., Yovel, I., & Li. W. (2005). Cronbach's Alpha, Revelle's Beta, McDonald's Omega: Their relations with each and two alternative conceptualizations of reliability. Psychometrika. 70, 123-133. \url{https://personality-project.org/revelle/publications/zinbarg.revelle.pmet.05.pdf} Zinbarg, R., Yovel, I., Revelle, W. & McDonald, R. (2006). Estimating generalizability to a universe of indicators that all have one attribute in common: A comparison of estimators for omega. Applied Psychological Measurement, 30, 121-144. DOI: 10.1177/0146621605278814 } \author{ \url{https://personality-project.org/revelle.html} \cr Maintainer: William Revelle \email{ revelle@northwestern.edu } } \note{ omega.graph requires rgraphviz. -- omega requires GPArotation } \seealso{ \code{\link{omega}}, \code{\link{make.hierarchical}}, \code{\link{ICLUST.rgraph}} } \examples{ #24 mental tests from Holzinger-Swineford-Harman if(require(GPArotation) ) {om24 <- omega(Harman74.cor$cov,4) } #run omega # #example hierarchical structure from Jensen and Weng if(require(GPArotation) ) {jen.omega <- omega(make.hierarchical())} } \keyword{ multivariate } psych/man/structure.list.Rd0000644000176200001440000000314611213100046015444 0ustar liggesusers\name{structure.list} \alias{structure.list} \alias{phi.list} \title{Create factor model matrices from an input list} \description{When creating a structural diagram or a structural model, it is convenient to not have to specify all of the zero loadings in a structural matrix. structure.list converts list input into a design matrix. phi.list does the same for a correlation matrix. Factors with NULL values are filled with 0s. } \usage{ structure.list(nvars, f.list,f=NULL, f.labels = NULL, item.labels = NULL) phi.list(nf,f.list, f.labels = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{nvars}{Number of variables in the design matrix } \item{f.list}{A list of items included in each factor (for structure.list, or the factors that correlate with the specified factor for phi.list} \item{f}{prefix for parameters -- needed in case of creating an X set and a Y set} \item{f.labels}{Names for the factors } \item{item.labels}{Item labels } \item{nf}{Number of factors in the phi matrix} } \details{This is almost self explanatory. See the examples. } \value{ \item{factor.matrix}{a matrix of factor loadings to model} } \seealso{ \code{\link{structure.graph}} for drawing it, or \code{\link{sim.structure}} for creating this data structure. } \examples{ fx <- structure.list(9,list(F1=c(1,2,3),F2=c(4,5,6),F3=c(7,8,9))) fy <- structure.list(3,list(Y=c(1,2,3)),"Y") phi <- phi.list(4,list(F1=c(4),F2=c(1,4),F3=c(2),F4=c(1,2,3))) fx phi fy } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{models }psych/man/simulation.circ.Rd0000644000176200001440000001153114305222626015550 0ustar liggesusers\name{simulation.circ} \alias{simulation.circ} \alias{circ.simulation} \alias{circ.sim.plot} \title{ Simulations of circumplex and simple structure} \description{Rotations of factor analysis and principal components analysis solutions typically try to represent correlation matrices as simple structured. An alternative structure, appealing to some, is a circumplex structure where the variables are uniformly spaced on the perimeter of a circle in a two dimensional space. Generating these data is straightforward, and is useful for exploring alternative solutions to affect and personality structure. } \usage{ simulation.circ(samplesize=c(100,200,400,800), numberofvariables=c(16,32,48,72)) circ.sim.plot(x.df) } \arguments{ \item{samplesize}{a vector of sample sizes to simulate } \item{numberofvariables}{vector of the number of variables to simulate } \item{x.df}{A data frame resulting from \code{\link{simulation.circ}} } } \details{``A common model for representing psychological data is simple structure (Thurstone, 1947). According to one common interpretation, data are simple structured when items or scales have non-zero factor loadings on one and only one factor (Revelle & Rocklin, 1979). Despite the commonplace application of simple structure, some psychological models are defined by a lack of simple structure. Circumplexes (Guttman, 1954) are one kind of model in which simple structure is lacking. ``A number of elementary requirements can be teased out of the idea of circumplex structure. First, circumplex structure implies minimally that variables are interrelated; random noise does not a circumplex make. Second, circumplex structure implies that the domain in question is optimally represented by two and only two dimensions. Third, circumplex structure implies that variables do not group or clump along the two axes, as in simple structure, but rather that there are always interstitial variables between any orthogonal pair of axes (Saucier, 1992). In the ideal case, this quality will be reflected in equal spacing of variables along the circumference of the circle (Gurtman, 1994; Wiggins, Steiger, & Gaelick, 1981). Fourth, circumplex structure implies that variables have a constant radius from the center of the circle, which implies that all variables have equal communality on the two circumplex dimensions (Fisher, 1997; Gurtman, 1994). Fifth, circumplex structure implies that all rotations are equally good representations of the domain (Conte & Plutchik, 1981; Larsen & Diener, 1992)." (Acton and Revelle, 2004) Acton and Revelle reviewed the effectiveness of 10 tests of circumplex structure and found that four did a particularly good job of discriminating circumplex structure from simple structure, or circumplexes from ellipsoidal structures. Unfortunately, their work was done in Pascal and is not easily available. Here we release R code to do the four most useful tests: The Gap test of equal spacing Fisher's test of equality of axes A test of indifference to Rotation A test of equal Variance of squared factor loadings across arbitrary rotations. Included in this set of functions are simple procedure to generate circumplex structured or simple structured data, the four test statistics, and a simple simulation showing the effectiveness of the four procedures. \code{\link{circ.sim.plot}} compares the four tests for circumplex, ellipsoid and simple structure data as function of the number of variables and the sample size. What one can see from this plot is that although no one test is sufficient to discriminate these alternative structures, the set of four tests does a very good job of doing so. When testing a particular data set for structure, comparing the results of all four tests to the simulated data will give a good indication of the structural properties of the data. } \value{A data.frame with simulation results for circumplex, ellipsoid, and simple structure data sets for each of the four tests.} \references{ Acton, G. S. and Revelle, W. (2004) Evaluation of Ten Psychometric Criteria for Circumplex Structure. Methods of Psychological Research Online, Vol. 9, No. 1 (formerly at https://www.dgps.de/fachgruppen/methoden/mpr-online/issue22/mpr110_10.pdf and now at \url{https://personality-project.org/revelle/publications/acton.revelle.mpr110_10.pdf}. } \author{ William Revelle} \note{The simulations default values are for sample sizes of 100, 200, 400, and 800 cases, with 16, 32, 48 and 72 items. } \seealso{See also \code{\link{circ.tests}}, \code{\link{sim.circ}}, \code{\link{sim.structural}}, \code{\link{sim.hierarchical}}, \code{\link{simCor}}, \code{\link{sim}}} \examples{ #not run demo <- simulation.circ() boxplot(demo[3:14]) title("4 tests of Circumplex Structure",sub="Circumplex, Ellipsoid, Simple Structure") circ.sim.plot(demo[3:14]) #compare these results to real data } \keyword{multivariate} \keyword{datagen} psych/man/cor2dist.Rd0000644000176200001440000000162613256544623014212 0ustar liggesusers\name{cor2dist} \alias{cor2dist} \title{Convert correlations to distances (necessary to do multidimensional scaling of correlation data)} \description{A minor helper function to convert correlations (ranging from -1 to 1) to distances (ranging from 0 to 2). \eqn{d = \sqrt{(2(1-r))}}. } \usage{ cor2dist(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{If square, then assumed to be a correlation matrix, otherwise the correlations are found first. } } \value{dist: a square matrix of distances. } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{William Revelle} \note{ For an example of doing multidimensional scaling on data that are normally factored, see Revelle (in prep) } \keyword{ multivariate } \keyword{ models} psych/man/pairs.panels.Rd0000644000176200001440000001557714407602232015060 0ustar liggesusers\name{pairs.panels} \alias{pairs.panels} \alias{panel.cor} \alias{panel.cor.scale} \alias{panel.hist} \alias{panel.lm} \alias{panel.lm.ellipse} \alias{panel.hist.density} \alias{panel.ellipse} \alias{panel.smoother} \title{SPLOM, histograms and correlations for a data matrix} \description{Adapted from the help page for pairs, pairs.panels shows a scatter plot of matrices (SPLOM), with bivariate scatter plots below the diagonal, histograms on the diagonal, and the Pearson correlation above the diagonal. Useful for descriptive statistics of small data sets. If lm=TRUE, linear regression fits are shown for both y by x and x by y. Correlation ellipses are also shown. Points may be given different colors depending upon some grouping variable. Robust fitting is done using lowess or loess regression. Confidence intervals of either the lm or loess are drawn if requested. } \usage{ \method{pairs}{panels}(x, smooth = TRUE, scale = FALSE, density=TRUE,ellipses=TRUE, digits = 2,method="pearson", pch = 20, lm=FALSE,cor=TRUE,jiggle=FALSE,factor=2, hist.col="cyan",show.points=TRUE,rug=TRUE, breaks = "Sturges",cex.cor=1,wt=NULL, smoother=FALSE,stars=FALSE,ci=FALSE,alpha=.05, ...) } \arguments{ \item{x}{a data.frame or matrix} \item{smooth}{TRUE draws loess smooths } \item{scale}{ TRUE scales the correlation font by the size of the absolute correlation. } \item{density}{TRUE shows the density plots as well as histograms} \item{ellipses}{TRUE draws correlation ellipses} \item{lm}{Plot the linear fit rather than the LOESS smoothed fits.} \item{digits}{ the number of digits to show} \item{method}{method parameter for the correlation ("pearson","spearman","kendall")} \item{pch}{The plot character (defaults to 20 which is a '.').} \item{cor}{If plotting regressions, should correlations be reported?} \item{jiggle}{Should the points be jittered before plotting?} \item{factor}{factor for jittering (1-5)} \item{hist.col}{What color should the histogram on the diagonal be?} \item{show.points}{If FALSE, do not show the data points, just the data ellipses and smoothed functions} \item{rug}{if TRUE (default) draw a rug under the histogram, if FALSE, don't draw the rug} \item{breaks}{If specified, allows control for the number of breaks in the histogram (see the hist function)} \item{cex.cor}{If this is specified, this will change the size of the text in the correlations. this allows one to also change the size of the points in the plot by specifying the normal cex values. If just specifying cex, it will change the character size, if cex.cor is specified, then cex will function to change the point size.} \item{wt}{If specified, then weight the correlations by a weights matrix (see note for some comments)} \item{smoother}{If TRUE, then smooth.scatter the data points -- slow but pretty with lots of subjects } \item{stars}{For those people who like to show the significance of correlations by using magic astricks, set stars=TRUE} \item{ci}{Draw confidence intervals for the linear model or for the loess fit, defaults to ci=FALSE. If confidence intervals are not drawn, the fitting function is lowess.} \item{alpha}{The alpha level for the confidence regions, defaults to .05} \item{\dots}{other options for pairs } } \details{Shamelessly adapted from the pairs help page. Uses panel.cor, panel.cor.scale, and panel.hist, all taken from the help pages for pairs. Also adapts the ellipse function from John Fox's car package. \code{\link{pairs.panels}} is most useful when the number of variables to plot is less than about 6-10. It is particularly useful for an initial overview of the data. To show different groups with different colors, use a plot character (pch) between 21 and 25 and then set the background color to vary by group. (See the second example). When plotting more than about 10 variables, it is useful to set the gap parameter to something less than 1 (e.g., 0). Alternatively, consider using \code{\link{cor.plot}} In addition, when plotting more than about 100-200 cases, it is useful to set the plotting character to be a point. (pch=".") Sometimes it useful to draw the correlation ellipses and best fitting loess without the points. (points.false=TRUE). } \value{A scatter plot matrix (SPLOM) is drawn in the graphic window. The lower off diagonal draws scatter plots, the diagonal histograms, the upper off diagonal reports the Pearson correlation (with pairwise deletion). If lm=TRUE, then the scatter plots are drawn above and below the diagonal, each with a linear regression fit. Useful to show the difference between regression lines. } \seealso{ \code{\link{pairs}} which is the base from which pairs.panels is derived, \code{\link{cor.plot}} to do a heat map of correlations, and \code{\link{scatter.hist}} to draw a single correlation plot with histograms and best fitted lines. To find the probability "significance" of the correlations using normal theory, use \code{\link{corr.test}}. To find confidence intervals using boot strapping procedures, use \code{\link{cor.ci}}. To graphically show confidence intervals, see \code{\link{cor.plot.upperLowerCi}}. } \note{If the data are either categorical or character, this is flagged with an astrix for the variable name. If character, they are changed to factors before plotting. The wt parameter allows for scatter plots of the raw data while showing the weighted correlation matrix (found by using \code{\link{cor.wt}}). The current implementation uses the first two columns of the weights matrix for all analyses. This is useful, but not perfect. The use of this option would be to plot the means from a \code{\link{statsBy}} analysis and then display the weighted correlations by specifying the means and ns from the statsBy run. See the final (not run) example.} \examples{ pairs.panels(attitude) #see the graphics window data(iris) pairs.panels(iris[1:4],bg=c("red","yellow","blue")[iris$Species], pch=21,main="Fisher Iris data by Species") #to show color grouping pairs.panels(iris[1:4],bg=c("red","yellow","blue")[iris$Species], pch=21+as.numeric(iris$Species),main="Fisher Iris data by Species",hist.col="red") #to show changing the diagonal #to show 'significance' pairs.panels(iris[1:4],bg=c("red","yellow","blue")[iris$Species], pch=21+as.numeric(iris$Species),main="Fisher Iris data by Species",hist.col="red",stars=TRUE) #demonstrate not showing the data points data(sat.act) pairs.panels(sat.act,show.points=FALSE) #better yet is to show the points as a period pairs.panels(sat.act,pch=".") #show many variables with 0 gap between scatterplots # data(bfi) # pairs.panels(psychTools::bfi,show.points=FALSE,gap=0) #plot raw data points and then the weighted correlations. #output from statsBy sb <- statsBy(sat.act,"education") pairs.panels(sb$mean,wt=sb$n) #report the weighted correlations #compare with pairs.panels(sb$mean) #unweighted correlations } \keyword{multivariate} \keyword{ hplot } psych/man/VSS.plot.Rd0000644000176200001440000000427613256544711014113 0ustar liggesusers\name{VSS.plot} \alias{VSS.plot} \title{Plot VSS fits} \description{The Very Simple Structure criterion ( \code{\link{VSS}}) for estimating the optimal number of factors is plotted as a function of the increasing complexity and increasing number of factors. } \usage{ VSS.plot(x, title = "Very Simple Structure", line = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{output from VSS } \item{title}{any title } \item{line}{ connect different complexities } } \details{Item-factor models differ in their "complexity". Complexity 1 means that all except the greatest (absolute) loading for an item are ignored. Basically a cluster model (e.g., \code{\link{ICLUST}}). Complexity 2 implies all except the greatest two, etc. Different complexities can suggest different number of optimal number of factors to extract. For personality items, complexity 1 and 2 are probably the most meaningful. The Very Simple Structure criterion will tend to peak at the number of factors that are most interpretable for a given level of complexity. Note that some problems, the most interpretable number of factors will differ as a function of complexity. For instance, when doing the Harman 24 psychological variable problems, an unrotated solution of complexity one suggests one factor (g), while a complexity two solution suggests that a four factor solution is most appropriate. This latter probably reflects a bi-factor structure. For examples of VSS.plot output, see \url{https://personality-project.org/r/r.vss.html} } \value{A plot window showing the VSS criterion varying as the number of factors and the complexity of the items. } \references{ \url{https://personality-project.org/r/r.vss.html}} \author{ Maintainer: William Revelle \email{revelle@northwestern.edu} } \seealso{ \code{\link{VSS}}, \code{\link{ICLUST}}, \code{\link{omega}}} \examples{ test.data <- Harman74.cor$cov my.vss <- VSS(test.data) #suggests that 4 factor complexity two solution is optimal VSS.plot(my.vss,title="VSS of Holzinger-Harmon problem") #see the graphics window } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/omega.Rd0000744000176200001440000007061514524267333013555 0ustar liggesusers\name{omega} \alias{omega} \alias{omegaSem} \alias{omegaFromSem} \alias{omegah} \alias{omegaDirect} \alias{directSl} \title{ Calculate McDonald's omega estimates of general and total factor saturation } \description{McDonald has proposed coefficient omega as an estimate of the general factor saturation of a test. One way to find omega is to do a factor analysis of the original data set, rotate the factors obliquely, do a Schmid Leiman transformation, and then find omega. This function estimates omega as suggested by McDonald by using hierarchical factor analysis (following Jensen). A related option is to define the model using omega and then perform a confirmatory (bi-factor) analysis using the sem or lavaan packages. This is done by omegaSem and omegaFromSem. omegaFromSem will convert appropriate sem/lavaan objects to find omega. Yet another option is to do the direct Schmid-Leiman of Waller. } \usage{ omega(m,nfactors=3,fm="minres",n.iter=1,p=.05,poly=FALSE,key=NULL, flip=TRUE,digits=2, title="Omega",sl=TRUE,labels=NULL, plot=TRUE,n.obs=NA,rotate="oblimin",Phi=NULL,option="equal",covar=FALSE, ...) omegaSem(m,nfactors=3,fm="minres",key=NULL,flip=TRUE,digits=2,title="Omega", sl=TRUE,labels=NULL, plot=TRUE,n.obs=NA,rotate="oblimin", Phi = NULL, option="equal",lavaan=TRUE,...) omegah(m,nfactors=3,fm="minres",key=NULL,flip=TRUE, digits=2,title="Omega",sl=TRUE,labels=NULL, plot=TRUE, n.obs=NA,rotate="oblimin",Phi = NULL,option="equal",covar=FALSE,two.ok=FALSE,...) omegaFromSem(fit,m=NULL,flip=TRUE,plot=TRUE) omegaDirect(m,nfactors=3,fm="minres",rotate="oblimin",cut=.3, plot=TRUE,main="Direct Schmid Leiman") directSl(m,nfactors=3,fm="minres",rotate="oblimin",cut=.3) } \arguments{ \item{m}{A correlation matrix, or a data.frame/matrix of data, or (if Phi) is specified, an oblique factor pattern matrix } \item{nfactors}{Number of factors believed to be group factors} \item{n.iter}{How many replications to do in omega for bootstrapped estimates} \item{fm}{factor method (the default is minres) fm="pa" for principal axes, fm="minres" for a minimum residual (OLS) solution, fm="pc" for principal components (see note), or fm="ml" for maximum likelihood.} \item{poly}{should the correlation matrix be found using polychoric/tetrachoric or normal Pearson correlations} \item{key}{a vector of +/- 1s to specify the direction of scoring of items. The default is to assume all items are positively keyed, but if some items are reversed scored, then key should be specified.} \item{flip}{If flip is TRUE, then items are automatically flipped to have positive correlations on the general factor. Items that have been reversed are shown with a - sign.} \item{p}{probability of two tailed conference boundaries} \item{digits}{if specified, round the output to digits} \item{title}{Title for this analysis} \item{main}{main for this analysis (directSl)} \item{cut}{Loadings greater than cut are used in directSl} \item{sl}{If plotting the results, should the Schmid Leiman solution be shown or should the hierarchical solution be shown? (default sl=TRUE)} \item{labels}{If plotting, what labels should be applied to the variables? If not specified, will default to the column names.} \item{plot}{plot=TRUE (default) calls omega.diagram, plot =FALSE does not. If Rgraphviz is available, then \code{\link{omega.graph}} may be used separately.} \item{n.obs}{Number of observations - used for goodness of fit statistic} \item{rotate}{What rotation to apply? The default is oblimin, the alternatives include simplimax, Promax, cluster and target. target will rotate to an optional keys matrix (See \code{\link{target.rot}})} \item{Phi}{If specified, then omega is found from the pattern matrix (m) and the factor intercorrelation matrix (Phi).} \item{option}{In the two factor case (not recommended), should the loadings be equal, emphasize the first factor, or emphasize the second factor. See in particular the option parameter in \code{\link{schmid}} for treating the case of two group factors.} \item{covar}{defaults to FALSE and the correlation matrix is found (standardized variables.) If TRUE, the do the calculations on the unstandardized variables and use covariances.} \item{two.ok}{If TRUE, do not give a warning about 3 factors being required. } \item{lavaan}{if FALSE, will use John Fox's sem package to do the omegaSem. If TRUE, will use Yves Rosseel's lavaan package. } \item{fit}{The fitted object from lavaan or sem. For lavaan, this includes the correlation matrix and the variable names and thus m needs not be specified.} \item{...}{Allows additional parameters to be passed through to the factor routines. } } \details{``Many scales are assumed by their developers and users to be primarily a measure of one latent variable. When it is also assumed that the scale conforms to the effect indicator model of measurement (as is almost always the case in psychological assessment), it is important to support such an interpretation with evidence regarding the internal structure of that scale. In particular, it is important to examine two related properties pertaining to the internal structure of such a scale. The first property relates to whether all the indicators forming the scale measure a latent variable in common. The second internal structural property pertains to the proportion of variance in the scale scores (derived from summing or averaging the indicators) accounted for by this latent variable that is common to all the indicators (Cronbach, 1951; McDonald, 1999; Revelle, 1979). That is, if an effect indicator scale is primarily a measure of one latent variable common to all the indicators forming the scale, then that latent variable should account for the majority of the variance in the scale scores. Put differently, this variance ratio provides important information about the sampling fluctuations when estimating individuals' standing on a latent variable common to all the indicators arising from the sampling of indicators (i.e., when dealing with either Type 2 or Type 12 sampling, to use the terminology of Lord, 1956). That is, this variance proportion can be interpreted as the square of the correlation between the scale score and the latent variable common to all the indicators in the infinite universe of indicators of which the scale indicators are a subset. Put yet another way, this variance ratio is important both as reliability and a validity coefficient. This is a reliability issue as the larger this variance ratio is, the more accurately one can predict an individual's relative standing on the latent variable common to all the scale's indicators based on his or her observed scale score. At the same time, this variance ratio also bears on the construct validity of the scale given that construct validity encompasses the internal structure of a scale." (Zinbarg, Yovel, Revelle, and McDonald, 2006). McDonald has proposed coefficient omega_hierarchical (\eqn{\omega_h}) as an estimate of the general factor saturation of a test. Zinbarg, Revelle, Yovel and Li (2005) \url{https://personality-project.org/revelle/publications/zinbarg.revelle.pmet.05.pdf} compare McDonald's \eqn{\omega_h} to Cronbach's \eqn{\alpha} and Revelle's \eqn{\beta}. They conclude that \eqn{\omega_h} is the best estimate. (See also Zinbarg et al., 2006 and Revelle and Zinbarg (2009)). One way to find \eqn{\omega_h}{omega_h} is to do a factor analysis of the original data set, rotate the factors obliquely, factor that correlation matrix, do a Schmid-Leiman (\link{schmid}) transformation to find general factor loadings, and then find \eqn{\omega_h}{omega_h}. Here we present code to do that. \eqn{\omega_h}{omega_h} differs as a function of how the factors are estimated. Four options are available, three use the \code{\link{fa}} function but with different factoring methods: the default does a minres factor solution, fm="pa" does a principle axes factor analysis fm="mle" does a maximum likelihood solution; fm="pc" does a principal components analysis using (\code{\link{principal}}). For ability items, it is typically the case that all items will have positive loadings on the general factor. However, for non-cognitive items it is frequently the case that some items are to be scored positively, and some negatively. Although probably better to specify which directions the items are to be scored by specifying a key vector, if flip =TRUE (the default), items will be reversed so that they have positive loadings on the general factor. The keys are reported so that scores can be found using the \code{\link{scoreItems}} function. Arbitrarily reversing items this way can overestimate the general factor. (See the example with a simulated circumplex). \eqn{\beta}{beta}, an alternative to \eqn{\omega_h}, is defined as the worst split half reliability (Revelle, 1979). It can be estimated by using \code{\link{ICLUST}} (a hierarchical clustering algorithm originally developed for main frames and written in Fortran and that is now part of the psych package. (For a very complimentary review of why the ICLUST algorithm is useful in scale construction, see Cooksey and Soutar, 2005)). The \code{\link{omega}} function uses exploratory factor analysis to estimate the \eqn{\omega_h} coefficient. It is important to remember that ``A recommendation that should be heeded, regardless of the method chosen to estimate \eqn{\omega_h}, is to always examine the pattern of the estimated general factor loadings prior to estimating \eqn{\omega_h}. Such an examination constitutes an informal test of the assumption that there is a latent variable common to all of the scale's indicators that can be conducted even in the context of EFA. If the loadings were salient for only a relatively small subset of the indicators, this would suggest that there is no true general factor underlying the covariance matrix. Just such an informal assumption test would have afforded a great deal of protection against the possibility of misinterpreting the misleading \eqn{\omega_h} estimates occasionally produced in the simulations reported here." (Zinbarg et al., 2006, p 137). A simple demonstration of the problem of an omega estimate reflecting just one of two group factors can be found in the last example. Diagnostic statistics that reflect the quality of the omega solution include a comparison of the relative size of the g factor eigen value to the other eigen values, the percent of the common variance for each item that is general factor variance (p2), the mean of p2, and the standard deviation of p2. Further diagnostics can be done by describing (\link{describe}) the $schmid$sl results. Although omega_h is uniquely defined only for cases where 3 or more subfactors are extracted, it is sometimes desired to have a two factor solution. By default this is done by forcing the schmid extraction to treat the two subfactors as having equal loadings. There are three possible options for this condition: setting the general factor loadings between the two lower order factors to be "equal" which will be the sqrt(oblique correlations between the factors) or to "first" or "second" in which case the general factor is equated with either the first or second group factor. A message is issued suggesting that the model is not really well defined. This solution discussed in Zinbarg et al., 2007. To do this in omega, add the option="first" or option="second" to the call. Although obviously not meaningful for a 1 factor solution, it is of course possible to find the sum of the loadings on the first (and only) factor, square them, and compare them to the overall matrix variance. This is done, with appropriate complaints. In addition to \eqn{\omega_h}, another of McDonald's coefficients is \eqn{\omega_t}. This is an estimate of the total reliability of a test. McDonald's \eqn{\omega_t}, which is similar to Guttman's \eqn{\lambda_6}, \code{\link{guttman}} but uses the estimates of uniqueness (\eqn{u^2}) from factor analysis to find \eqn{e_j^2}. This is based on a decomposition of the variance of a test score, \eqn{V_x} into four parts: that due to a general factor, \eqn{\vec{g}}, that due to a set of group factors, \eqn{\vec{f}}, (factors common to some but not all of the items), specific factors, \eqn{\vec{s}} unique to each item, and \eqn{\vec{e}}, random error. (Because specific variance can not be distinguished from random error unless the test is given at least twice, some combine these both into error). Letting \eqn{\vec{x} = \vec{cg} + \vec{Af} + \vec {Ds} + \vec{e}}{x = cg + Af + Ds + e} then the communality of item\eqn{_j}, based upon general as well as group factors, \eqn{h_j^2 = c_j^2 + \sum{f_{ij}^2}}{h_j^2 = c_j^2 + sum(f_ij^2)} and the unique variance for the item \eqn{u_j^2 = \sigma_j^2 (1-h_j^2)} may be used to estimate the test reliability. That is, if \eqn{h_j^2} is the communality of item\eqn{_j}, based upon general as well as group factors, then for standardized items, \eqn{e_j^2 = 1 - h_j^2} and \deqn{ \omega_t = \frac{\vec{1}\vec{cc'}\vec{1} + \vec{1}\vec{AA'}\vec{1}'}{V_x} = 1 - \frac{\sum(1-h_j^2)}{V_x} = 1 - \frac{\sum u^2}{V_x}}{\omega_t = (1 cc' 1 + 1 AA' 1')/(V_x)} Because \eqn{h_j^2 \geq r_{smc}^2}, \eqn{\omega_t \geq \lambda_6}. It is important to distinguish here between the two \eqn{\omega} coefficients of McDonald, 1978 and Equation 6.20a of McDonald, 1999, \eqn{\omega_t} and \eqn{\omega_h}. While the former is based upon the sum of squared loadings on all the factors, the latter is based upon the sum of the squared loadings on the general factor. \deqn{\omega_h = \frac{ \vec{1}\vec{cc'}\vec{1}}{V_x}}{\omega_h = (1 cc' 1')/Vx} Another estimate reported is the omega for an infinite length test with a structure similar to the observed test (omega H asymptotic). This is found by \deqn{\omega_{limit} = \frac{\vec{1}\vec{cc'}\vec{1}}{\vec{1}\vec{cc'}\vec{1} + \vec{1}\vec{AA'}\vec{1}'}}{\omega_{limit} = (1 cc' 1')/(1 cc' 1' + 1 AA' 1')}. Following suggestions by Steve Reise, the Explained Common Variance (ECV) is also reported. This is the ratio of the general factor eigen value to the sum of all of the eigen values. As such, it is a better indicator of unidimensionality than of the amount of test variance accounted for by a general factor. The input to omega may be a correlation matrix or a raw data matrix, or a factor pattern matrix with the factor intercorrelations (Phi) matrix. \code{\link{omega}} is an exploratory factor analysis function that uses a Schmid-Leiman transformation. \code{\link{omegaSem}} first calls \code{\link{omega}} and then takes the Schmid-Leiman solution, converts this to a confirmatory sem model and then calls the sem package to conduct a confirmatory model. \eqn{\omega_h} is then calculated from the CFA output. Although for well behaved problems, the efa and cfa solutions will be practically identical, the CFA solution will not always agree with the EFA solution. In particular, the estimated \eqn{R^2} will sometimes exceed 1. (An example of this is the Harman 24 cognitive abilities problem.) In addition, not all EFA solutions will produce workable CFA solutions. Model misspecifications will lead to very strange CFA estimates. It is also possible to give \code{\link{omega}} a factor pattern matrix and the associated factor intercorrelation. In this case, the analysis will be done on these matrices. This is particularly useful if one is not satisfied with the exploratory EFA solutions and rotation options and somehow comes up with an alternative. (For instance, one might want to do a EFA using fm='pa' with a Kaiser normalized Promax solution with a specified m value.) \code{\link{omegaFromSem}} takes the output from a sem model and uses it to find \eqn{\omega_h}. The estimate of factor indeterminacy, found by the multiple \eqn{R^2} of the variables with the factors, will not match that found by the EFA model. In particular, the estimated \eqn{R^2} will sometimes exceed 1. (An example of this is the Harman 24 cognitive abilities problem.) The notion of omega may be applied to the individual factors as well as the overall test. A typical use of omega is to identify subscales of a total inventory. Some of that variability is due to the general factor of the inventory, some to the specific variance of each subscale. Thus, we can find a number of different omega estimates: what percentage of the variance of the items identified with each subfactor is actually due to the general factor. What variance is common but unique to the subfactor, and what is the total reliable variance of each subfactor. These results are reported in omega.group object and in the last few lines of the normal output. Finally, and still being tested, is \code{\link{omegaDirect}} adapted from Waller (2017). This is a direct rotation to a Schmid-Leiman like solution without doing the hierarchical factoring (\code{\link{directSl}}). This rotation is then interpreted in terms of omega. It is included here to allow for comparisons with the alternative procedures \code{\link{omega}} and \code{\link{omegaSem}}. Preliminary analyses suggests that it produces inappropriate solutions for the case where there is no general factor. Moral: Finding omega_h is tricky and one should probably compare \code{\link{omega}}, \code{\link{omegaSem}}, \code{\link{omegaDirect}} and even \code{\link{iclust}} solutions to understand the differences. The summary of the omega object is a reduced set of the most useful output. The various objects returned from omega include: } \value{ \item{omega hierarchical}{The \eqn{\omega_h} coefficient} \item{omega.lim}{The limit of \eqn{\omega_h} as the test becomes infinitly large} \item{omega total}{The \eqn{omega_t} coefficient} \item{alpha}{Cronbach's \eqn{\alpha}} \item{schmid}{The Schmid Leiman transformed factor matrix and associated matrices} \item{schmid$sl}{The g factor loadings as well as the residualized factors} \item{schmid$orthog}{Varimax rotated solution of the original factors} \item{schmid$oblique}{The oblimin or promax transformed factors} \item{schmid$phi}{the correlation matrix of the oblique factors} \item{schmid$gloading}{The loadings on the higher order, g, factor of the oblimin factors} \item{key}{A vector of -1 or 1 showing which direction the items were scored.} \item{model}{a list of two elements, one suitable to give to the sem function for structure equation models, the other, to give to the lavaan package. } \item{sem}{The output from a sem analysis} \item{omega.group}{The summary statistics for the omega total, omega hierarchical (general) and omega within each group.} \item{scores}{Factor score estimates are found for the Schmid-Leiman solution. To get scores for the hierarchical model see the note.} \item{various fit statistics}{various fit statistics, see output} \item{OmegaSem}{ is an object that contains the fits for the OmegaSem output.} \item{loadings}{The direct SL rotated object (from omegaDirect)} \item{orth.f}{The original, unrotated solution from omegaDirect} \item{Target}{The cluster based target for rotation in directSl} } \references{ \url{https://personality-project.org/r/r.omega.html} \cr Jensen, Arthur R. and Li-Jen Weng (1994) What is a good g? Intelligence, 18, 3, 231-258 Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Revelle, W. (1979). Hierarchical cluster analysis and the internal structure of tests. Multivariate Behavioral Research, 14, 57-74. (\url{https://personality-project.org/revelle/publications/iclust.pdf}) Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv Revelle, W. and Zinbarg, R. E. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 74, 1, 145-154. (\url{https://personality-project.org/revelle/publications/rz09.pdf} Waller, N. G. (2017) Direct {Schmid-Leiman} Transformations and Rank-Deficient Loadings Matrices. Psychometrika. DOI: 10.1007/s11336-017-9599-0 Zinbarg, R.E., Revelle, W., Yovel, I., & Li. W. (2005). Cronbach's Alpha, Revelle's Beta, McDonald's Omega: Their relations with each and two alternative conceptualizations of reliability. Psychometrika. 70, 123-133. \url{https://personality-project.org/revelle/publications/zinbarg.revelle.pmet.05.pdf} Zinbarg, R., Yovel, I. & Revelle, W. (2007). Estimating omega for structures containing two group factors: Perils and prospects. Applied Psychological Measurement. 31 (2), 135-157. Zinbarg, R., Yovel, I., Revelle, W. & McDonald, R. (2006). Estimating generalizability to a universe of indicators that all have one attribute in common: A comparison of estimators for omega. Applied Psychological Measurement, 30, 121-144. DOI: 10.1177/0146621605278814 } \author{ \url{https://personality-project.org/revelle.html} \cr Maintainer: William Revelle \email{ revelle@northwestern.edu } } \note{Requires the GPArotation package. The default rotation uses oblimin from the GPArotation package. Alternatives include the simplimax function, as well as \code{\link{Promax}} or the \code{\link{promax}} rotations. promax will do a Kaiser normalization before applying Promax rotation. If the factor solution leads to an exactly orthogonal solution (probably only for demonstration data sets), then use the rotate="Promax" option to get a solution. \code{\link{omegaSem}} requires the sem or lavaan packages. \code{\link{omegaFromSem}} uses the output from the sem or lavaan package. \code{\link{omega}} may be run on raw data (finding either Pearson or tetrachoric/polychoric corrlations, depending upon the poly option) a correlation matrix, a polychoric correlation matrix (found by e.g., \code{\link{polychoric}}), or the output of a previous omega run. This last case is particularly useful when working with categorical data using the poly=TRUE option. For in this case, most of the time is spent in finding the correlation matrix. The matrix is saved as part of the omega output and may be used as input for subsequent runs. A similar feature is found in \code{\link{irt.fa}} where the output of one analysis can be taken as the input to the subsequent analyses. However, simulations based upon tetrachoric and polychoric correlations suggest that although the structure is better defined, that the estimates of omega are inflated over the true general factor saturation. Omega returns factor scores based upon the Schmid-Leiman transformation. To get the hierarchical factor scores, it is necessary to do this outside of omega. See the example (not run). Consider the case of the raw data in an object data. Then f3 <- fa(data,3,scores="tenBerge", oblique.rotation=TRUE f1 <- fa(f3$scores) hier.scores <- data.frame(f1$scores,f3$scores) When doing fm="pc", principal components are done for the original correlation matrix, but minres is used when examining the intercomponent correlations. A warning is issued that the method was changed to minres for the higher order solution. omega is a factor model, and finding loadings using principal components will overestimate the resulting solution. This is particularly problematic for the amount of group saturation, and thus the omega.group statistics are overestimates. The last three lines of omega report "Total, General and Subset omega for each subset". These are available as the omega.group object in the output. The last of these (omega group) is effectively what Steve Reise calls omegaS for the subset omega. The omega general is the amount of variance in the group that is accounted for by the general factor, the omega total is the amount of variance in the group accounted for by general + group. This is based upon a cluster solution (that is to say, every item is assigned to one group) and this is why for first column the omega general and group do not add up to omega total. Some of the variance is found in the cross loadings between groups. Reise and others like to report the ratio of the second line to the first line (what portion of the reliable variance is general factor) and the third row to the first (what portion of the reliable variance is within group but not general. This may be found by using the omega.group object that is returned by omega. (See the last example.) If using the lavaan=TRUE option in \code{\link{omegaSem}} please note that variable names can not start with a digit (e.g. 4.Letter.Words in the \code{\link{Thurstone}} data set. The leading digit needs to be removed. \code{\link{omegaSem}} will do an exploratory efa and omega, create (and return) the commands for doing either a sem or lavaan analysis. The commands are returned as the model object. This can be used for further sem/lavaan analyses. Omega can also be found from an analysis done using lavaan or sem directly by calling \code{\link{omegaFromSem}} with the original correlation matrix and the fit of the sem/lavaan model. See the last (not run) example) } \seealso{ \code{\link{omega.graph}} for a dot code graphic. \code{\link{ICLUST}}, \code{\link{ICLUST.diagram}} for hierarchical cluster analysis, \code{\link{VSS}}, \code{\link{nfactors}}, \code{\link{fa.parallel}} for alternative ways of estimating the appropriate number of factors. \code{\link{schmid} } for the decomposition used in omega. \code{\link{make.hierarchical}} to simulate a hierarchical model. \code{\link{fa.multi}} for hierarchical factor analysis with an arbitrary number of 2nd order factors. \code{\link{reliability}} to do multiple omega analysis at the same time on different subsets of items. } \examples{ \dontrun{ test.data <- Harman74.cor$cov # if(!require(GPArotation)) {message("Omega requires GPA rotation" )} else { my.omega <- omega(test.data) print(my.omega,digits=2) #} #create 9 variables with a hierarchical structure v9 <- sim.hierarchical() #with correlations of round(v9,2) #find omega v9.omega <- omega(v9,digits=2) v9.omega #create 8 items with a two factor solution, showing the use of the flip option sim2 <- item.sim(8) omega(sim2) #an example of misidentification-- remember to look at the loadings matrices. omega(sim2,2) #this shows that in fact there is no general factor omega(sim2,2,option="first") #but, if we define one of the two group factors #as a general factor, we get a falsely high omega #apply omega to analyze 6 mental ability tests data(ability.cov) #has a covariance matrix omega(ability.cov$cov) #om <- omega(Thurstone) #round(om$omega.group,2) #round(om$omega.group[2]/om$omega.group[1],2) #fraction of reliable that is general variance # round(om$omega.group[3]/om$omega.group[1],2) #fraction of reliable that is group variance #To find factor score estimates for the hierarchical model it is necessary to #do two extra steps. #Consider the case of the raw data in an object data. (An example from simulation) # set.seed(42) # gload <- matrix(c(.9,.8,.7),nrow=3) # fload <- matrix(c(.8,.7,.6,rep(0,9),.7,.6,.5,rep(0,9),.7,.6,.4), ncol=3) # data <- sim.hierarchical(gload=gload,fload=fload, n=100000, raw=TRUE) # # f3 <- fa(data$observed,3,scores="tenBerge", oblique.scores=TRUE) # f1 <- fa(f3$scores) # om <- omega(data$observed,sl=FALSE) #draw the hierarchical figure # The scores from om are based upon the Schmid-Leiman factors and although the g factor # is identical, the group factors are not. # This is seen in the following correlation matrix # hier.scores <- cbind(om$scores,f1$scores,f3$scores) # lowerCor(hier.scores) # #this next set of examples require lavaan #jensen <- sim.hierarchical() #create a hierarchical structure (same as v9 above) #om.jen <- omegaSem(jensen,lavaan=TRUE) #do the exploratory omega with confirmatory as well #lav.mod <- om.jen$omegaSem$model$lavaan #get the lavaan code or create it yourself # lav.mod <- 'g =~ +V1+V2+V3+V4+V5+V6+V7+V8+V9 # F1=~ + V1 + V2 + V3 # F2=~ + V4 + V5 + V6 # F3=~ + V7 + V8 + V9 ' #lav.jen <- cfa(lav.mod,sample.cov=jensen,sample.nobs=500,orthogonal=TRUE,std.lv=TRUE) # omegaFromSem(lav.jen,jensen) #the directSl solution #direct.jen <- directSl(jen) #direct.jen #try a one factor solution -- this is not recommended, but sometimes done #it will just give omega_total # lav.mod.1 <- 'g =~ +V1+V2+V3+V4+V5+V6+V7+V8+V9 ' #lav.jen.1<- cfa(lav.mod.1,sample.cov=jensen,sample.nobs=500,orthogonal=TRUE,std.lv=TRUE) # omegaFromSem(lav.jen.1,jensen) } } \keyword{ multivariate } \keyword{ models } psych/man/fa.extension.Rd0000644000176200001440000002636014533707555015070 0ustar liggesusers\name{fa.extension} \alias{fa.extension} \alias{fa.extend} \alias{faRegression} \alias{faReg} \title{Apply Dwyer's factor extension to find factor loadings for extended variables} \description{Dwyer (1937) introduced a method for finding factor loadings for variables not included in the original analysis. This is basically finding the unattenuated correlation of the extension variables with the factor scores. An alternative, which does not correct for factor reliability was proposed by Gorsuch (1997). Both options are an application of exploratory factor analysis with extensions to new variables. Also useful for finding the validities of variables in the factor space. } \usage{ fa.extension(Roe,fo,correct=TRUE) fa.extend(r,nfactors=1,ov=NULL,ev=NULL,n.obs = NA, np.obs=NULL, correct=TRUE,rotate="oblimin",SMC=TRUE, warnings=TRUE, fm="minres", alpha=.1,omega=FALSE,cor="cor",use="pairwise",cor.correct=.5,weight=NULL, missing=FALSE, smooth=TRUE, ...) faRegression(r,nfactors=1,ov=NULL,dv=NULL, n.obs = NA , np.obs=NULL,correct=TRUE,rotate="oblimin",SMC=TRUE,warnings=TRUE, fm="minres",alpha=.1, omega=FALSE,cor="cor",use="pairwise",cor.correct=.5,weight=NULL,smooth=TRUE, ...) #this is just an alias for faReg(r,nfactors=1,ov=NULL,dv=NULL, n.obs = NA , np.obs=NULL,correct=TRUE,rotate="oblimin",SMC=TRUE,warnings=TRUE, fm="minres",alpha=.1, omega=FALSE,cor="cor",use="pairwise",cor.correct=.5,weight=NULL,smooth=TRUE, ...) } \arguments{ \item{Roe}{The correlations of the original variables with the extended variables} \item{fo}{The output from the \code{\link{fa}} or \code{\link{omega}} functions applied to the original variables.} \item{correct}{correct=TRUE produces Dwyer's solution, correct=FALSE does not correct for factor reliability. This is not quite the Gorsuch technique.} \item{r}{A correlation or data matrix with all of the variables to be analyzed by fa.extend} \item{ov}{The original variables to factor} \item{ev}{The extension variables} \item{dv}{The dependent variables if doing faRegression} \item{nfactors}{ Number of factors to extract, default is 1 } \item{n.obs}{Number of observations used to find the correlation matrix if using a correlation matrix. Used for finding the goodness of fit statistics. Must be specified if using a correlaton matrix and finding confidence intervals.} \item{np.obs}{Pairwise number of observations. Required if using fm="minchi", suggested in other cases to estimate the empirical goodness of fit.} \item{rotate}{"none", "varimax", "quartimax", "bentlerT", "geominT" and "bifactor" are orthogonal rotations. "promax", "oblimin", "simplimax", "bentlerQ, "geominQ" and "biquartimin" and "cluster" are possible rotations or transformations of the solution. The default is to do a oblimin transformation, although versions prior to 2009 defaulted to varimax.} \item{SMC}{Use squared multiple correlations (SMC=TRUE) or use 1 as initial communality estimate. Try using 1 if imaginary eigen values are reported. If SMC is a vector of length the number of variables, then these values are used as starting values in the case of fm='pa'. } \item{warnings}{warnings=TRUE => warn if number of factors is too many } \item{fm}{factoring method fm="minres" will do a minimum residual (OLS), fm="wls" will do a weighted least squares (WLS) solution, fm="gls" does a generalized weighted least squares (GLS), fm="pa" will do the principal factor solution, fm="ml" will do a maximum likelihood factor analysis. fm="minchi" will minimize the sample size weighted chi square when treating pairwise correlations with different number of subjects per pair.} \item{alpha}{alpha level for the confidence intervals for RMSEA} \item{omega}{Do the extension analysis for an omega type analysis} \item{cor}{Pass the kind of correlation to fa (defaults to Pearson, can use mixed)} \item{use}{Option for the cor function on how to handle missing data.} \item{cor.correct}{The correction to be passed to mixed, tet, or polycor (defaults to .5)} \item{weight}{Should we weight the variables? (see \code{\link{fa}})} \item{missing}{When finding factor scores, are missing data allowed?} \item{smooth}{Should we smooth the correlation matrix -- smoothing produces bad output if there are NAs in the R matrix } \item{...}{Additional parameters, specifically, keys may be passed if using the target rotation, or delta if using geominQ, or whether to normalize if using Varimax} } \details{It is sometimes the case that factors are derived from a set of variables (the Fo factor loadings) and we want to see what the loadings of an extended set of variables (Fe) would be. Given the original correlation matrix Ro and the correlation of these original variables with the extension variables of Roe, it is a straight forward calculation to find the loadings Fe of the extended variables on the original factors. This technique was developed by Dwyer (1937) for the case of adding new variables to a factor analysis without doing all the work over again. But, as discussed by Horn (1973) factor extension is also appropriate when one does not want to include the extension variables in the original factor analysis, but does want to see what the loadings would be anyway. This could be done by estimating the factor scores and then finding the covariances of the extension variables with the factor scores. If raw data are available, this is done by \code{\link{faReg}}. But if the original data are not available, but just the covariance or correlation matrix is, then the use of \code{\link{fa.extension}} is most appropriate to estimate those correlations before doing the regression. The factor analysis results from either \code{\link{fa}} or \code{\link{omega}} functions applied to the original correlation matrix is extended to the extended variables given the correlations (Roe) of the extended variables with the original variables. \code{\link{fa.extension}} assumes that the original factor solution was found by the \code{\link{fa}} function. For a very nice discussion of the relationship between factor scores, correlation matrices, and the factor loadings in a factor extension, see Horn (1973). The \code{\link{fa.extend}} function may be thought of as a "seeded" factor analysis. That is, the variables in the original set are factored, this solution is then extended to the extension set, and the resulting output is presented as if both the original and extended variables were factored together. This may also be done for an omega analysis. The example of \code{\link{fa.extend}} compares the extended solution to a direct solution of all of the variables using \code{\link{factor.congruence}}. Another important use of factor extension is when parts of a correlation matrix are missing. Thus suppose you have a correlation matrix formed of variables 1 .. i, j..n where the first i variables correlate with each other (R11) and with the j... n (R12) variables, but the elements of of the R22 (j..n) are missing. That is, X has a missing data structure but we can find the correlation matrix R[1..n,1..n] =cor(X,na.rm=TRUE) R[1..n,1..n] = \tabular{rr}{ R[1..i,1..i] \tab R[1..i,j..n] \cr R[j..n,1..i] \tab NA \cr } Factors can be found for the R11 matrix and then extended to the variables in the entire matrix. This allows for irt approaches to be applied even with significantly missing data. f <- fa.extend(R,nf=1, ov=1:i,ev=j:n) Then the loadings of n loadings of f may be found. Combining this procedure with \code{\link{fa2irt}} allows us to find the irt based parameters of the n variables, even though we have substantially incomplete data. Using the idea behind factor extension it is straightforward to apply these techniques to multiple regression, because the extended loadings are functionally beta weights. . \code{\link{faRegression}} just organizes the extension analysis in terms of a regression analysis. If the raw data are available, it first finds the factor scores and then correlates these with the dependent variable in the standard regression approach. But, if just the correlation matrix is available, it estimates the factor by dependent variable correlations by the use of \code{link{fa.extend}} before doing the regression. } \value{Factor Loadings of the exended variables on the original factors} \references{Paul S. Dwyer (1937) The determination of the factor loadings of a given test from the known factor loadings of other tests. Psychometrika, 3, 173-178 Gorsuch, Richard L. (1997) New procedure for extension analysis in exploratory factor analysis, Educational and Psychological Measurement, 57, 725-740 Horn, John L. (1973) On extension analysis and its relation to correlations between variables and factor scores. Multivariate Behavioral Research, 8, (4), 477-489. } \author{William Revelle } \seealso{ See Also as \code{\link{fa}}, \code{\link{principal}}, \code{\link{Dwyer}} } \examples{ #The Dwyer Example Ro <- Dwyer[1:7,1:7] Roe <- Dwyer[1:7,8] fo <- fa(Ro,2,rotate="none") fe <- fa.extension(Roe,fo) #an example from simulated data set.seed(42) d <- sim.item(12) #two orthogonal factors R <- cor(d) Ro <- R[c(1,2,4,5,7,8,10,11),c(1,2,4,5,7,8,10,11)] Roe <- R[c(1,2,4,5,7,8,10,11),c(3,6,9,12)] fo <- fa(Ro,2) fe <- fa.extension(Roe,fo) fa.diagram(fo,fe=fe) #alternatively just specify the original variables and the extension variables fe = fa.extend(R, 2, ov =c(1,2,4,5,7,8,10,11), ev=c(3,6,9,12)) fa.diagram(fe$fo, fe = fe$fe) #create two correlated factors fx <- matrix(c(.9,.8,.7,.85,.75,.65,rep(0,12),.9,.8,.7,.85,.75,.65),ncol=2) Phi <- matrix(c(1,.6,.6,1),2) sim.data <- sim.structure(fx,Phi,n=1000,raw=TRUE) R <- cor(sim.data$observed) Ro <- R[c(1,2,4,5,7,8,10,11),c(1,2,4,5,7,8,10,11)] Roe <- R[c(1,2,4,5,7,8,10,11),c(3,6,9,12)] fo <- fa(Ro,2) fe <- fa.extension(Roe,fo) fa.diagram(fo,fe=fe) #now show how fa.extend works with the same data set #note that we have to make sure that the variables are in the order to do the factor congruence fe2 <- fa.extend(sim.data$observed,2,ov=c(1,2,4,5,7,8,10,11),ev=c(3,6,9,12)) fa.diagram(fe2,main="factor analysis with extension variables") fa2 <- fa(sim.data$observed[,c(1,2,4,5,7,8,10,11,3,6,9,12)],2) factor.congruence(fe2,fa2) summary(fe2) #an example of extending an omega analysis fload <- matrix(c(c(c(.9,.8,.7,.6),rep(0,20)),c(c(.9,.8,.7,.6),rep(0,20)),c(c(.9,.8,.7,.6), rep(0,20)),c(c(c(.9,.8,.7,.6),rep(0,20)),c(.9,.8,.7,.6))),ncol=5) gload <- matrix(rep(.7,5)) five.factor <- sim.hierarchical(gload,fload,500,TRUE) #create sample data set ss <- c(1,2,3,5,6,7,9,10,11,13,14,15,17,18,19) Ro <- cor(five.factor$observed[,ss]) Re <- cor(five.factor$observed[,ss],five.factor$observed[,-ss]) om5 <-omega(Ro,5) #the omega analysis om.extend <- fa.extension(Re,om5) #the extension analysis om.extend #show it #now, include it in an omega diagram combined.om <- rbind(om5$schmid$sl[,1:ncol(om.extend$loadings)],om.extend$loadings) class(combined.om) <-c("psych","extend") omega.diagram(combined.om,main="Extended Omega") #show how to use fa.extend to do regression analyses with the raw data b5 <- faReg (bfi, nfactors = 5, ov =1:25, dv =26:28) extension.diagram(b5) R <-cor(bfi,use="pairwise") b5.r <- faReg(R, nfactors = 5, ov =1:25, dv =26:28) # not identical to b5 round(b5$regression$coefficients - b5.r$regression$coefficients,2) } \keyword{ multivariate }psych/man/iclust.diagram.Rd0000644000176200001440000000747414403157402015364 0ustar liggesusers\name{iclust.diagram} \Rdversion{1.1} \alias{iclust.diagram} \alias{ICLUST.diagram} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Draw an ICLUST hierarchical cluster structure diagram } \description{Given a cluster structure determined by \code{\link{ICLUST}}, create a graphic structural diagram using graphic functions in the psych package To create dot code to describe the \code{\link{ICLUST}} output with more precision, use \code{\link{ICLUST.graph}}. If Rgraphviz has been successfully installed, the alternative is to use \code{\link{ICLUST.rgraph}}. } \usage{ iclust.diagram(ic, labels = NULL, short = FALSE, digits = 2, cex = NULL, min.size = NULL, e.size =1, colors=c("black","blue"), main = "ICLUST diagram", cluster.names=NULL,marg=c(.5,.5,1.5,.5),plot=TRUE, bottomup=TRUE) } \arguments{ \item{ic}{Output from ICLUST} \item{labels}{labels for variables (if not specified as rownames in the ICLUST output} \item{short}{if short=TRUE, variable names are replaced with Vn} \item{digits}{Round the path coefficients to digits accuracy} \item{cex}{The standard graphic control parameter for font size modifications. This can be used to make the labels bigger or smaller than the default values.} \item{min.size}{Don't provide statistics for clusters less than min.size} \item{e.size}{size of the ellipses with the cluster statistics.} \item{colors}{postive and negative } \item{main}{The main graphic title} \item{cluster.names}{Normally, clusters are named sequentially C1 ... Cn. If cluster.names are specified, then these values will be used instead.} \item{marg}{Sets the margins to be narrower than the default values. Resets them upon return} \item{plot}{If plot is TRUE, then draw the diagram, if FALSE, then just return the veriable order from the plot} \item{bottomup}{Which way to draw the arrows. TRUE means from the items to the clusters. See note. } } \details{iclust.diagram provides most of the power of \code{\link{ICLUST.rgraph}} without the difficulties involved in installing Rgraphviz. It is called automatically from ICLUST. Following a request by Michael Kubovy, cluster.names may be specified to replace the normal C1 ... Cn names. If access to a dot language graphics program is available, it is probably better to use the iclust.graph function to get dot output for offline editing. Until 3/11/2 arrows went from clusters to items. The default value for bottomup has been changed to draw from items to clusters. To draw the old way, set bottomup=TRUE.} \value{ Graphical output summarizing the hierarchical cluster structure. The graph is drawn using the diagram functions (e.g., \code{\link{dia.curve}}, \code{\link{dia.arrow}}, \code{\link{dia.rect}}, \code{\link{dia.ellipse}} ) created as a work around to Rgraphviz. Also returned (invisibly) is a vector of variable names ordered by their location in the tree diagram. The plot option suppresses the plot for speed. } \references{Revelle, W. Hierarchical Cluster Analysis and the Internal Structure of Tests. Multivariate Behavioral Research, 1979, 14, 57-74. } \author{William Revelle } \note{Suggestions for improving the graphic output are welcome. } \seealso{ \code{\link{ICLUST}} } \examples{ v9 <- sim.hierarchical() v9c <- ICLUST(v9) test.data <- Harman74.cor$cov ic.out <- ICLUST(test.data) #now show how to relabel clusters ic.bfi <- iclust(psychTools::bfi[1:25],beta=3) #find the clusters cluster.names <- rownames(ic.bfi$results) #get the old names #change the names to the desired ones cluster.names[c(16,19,18,15,20)] <- c("Neuroticism","Extra-Open","Agreeableness", "Conscientiousness","Open") #now show the new names iclust.diagram(ic.bfi,cluster.names=cluster.names,min.size=4,e.size=1.75) } \keyword{ multivariate} \keyword{ cluster}% __ONLY ONE__ keyword per line \keyword{hplot} psych/man/smc.Rd0000644000176200001440000000305114001072570013217 0ustar liggesusers\name{smc} \alias{smc} \title{Find the Squared Multiple Correlation (SMC) of each variable with the remaining variables in a matrix} \description{The squared multiple correlation of a variable with the remaining variables in a matrix is sometimes used as initial estimates of the communality of a variable. SMCs are also used when estimating reliability using Guttman's lambda 6 \code{\link{guttman}} coefficient. The SMC is just 1 - 1/diag(R.inv) where R.inv is the inverse of R. } \usage{ smc(R,covar=FALSE) } \arguments{ \item{R}{ A correlation matrix or a dataframe. In the latter case, correlations are found.} \item{covar}{if covar = TRUE and R is either a covariance matrix or data frame, then return the smc * variance for each item} } \value{a vector of squared multiple correlations. Or, if covar=TRUE, a vector of squared multiple correlations * the item variances If the matrix is not invertible, then a vector of 1s is returned. Note, that I now take the left pseudo inverse so this is less likely to happen (if at all). In the case of correlation or covariance matrices with some NAs, those variables with NAs are dropped and the SMC for the remaining variables are found. The missing SMCs are then estimated by finding the maximum correlation for that column (with a warning). } \author{ William Revelle } \seealso{ \code{\link{mat.regress}}, \code{\link{fa}} } \examples{ R <- make.hierarchical() round(smc(R),2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/VSS.Rd0000644000176200001440000002145214374011042013115 0ustar liggesusers\name{VSS} \alias{vss} \alias{VSS} \alias{MAP} \alias{eigenCi} \alias{nfactors} \title{ Apply the Very Simple Structure, MAP, and other criteria to determine the appropriate number of factors.} \description{There are multiple ways to determine the appropriate number of factors in exploratory factor analysis. Routines for the Very Simple Structure (VSS) criterion allow one to compare solutions of varying complexity and for different number of factors. Graphic output indicates the "optimal" number of factors for different levels of complexity. The Velicer MAP criterion is another good choice. \code{\link{nfactors}} finds and plots several of these alternative estimates. } \usage{ vss(x, n = 8, rotate = "varimax", diagonal = FALSE, fm = "minres", n.obs=NULL,plot=TRUE,title="Very Simple Structure",use="pairwise",cor="cor",...) VSS(x, n = 8, rotate = "varimax", diagonal = FALSE, fm = "minres", n.obs=NULL,plot=TRUE,title="Very Simple Structure",use="pairwise",cor="cor",...) nfactors(x,n=20,rotate="varimax",diagonal=FALSE,fm="minres",n.obs=NULL, title="Number of Factors",pch=16,use="pairwise", cor="cor",...) eigenCi(x,n.iter=1000, use="pairwise", alpha=.05,plot=FALSE,root=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a correlation matrix or a data matrix} \item{n}{Number of factors to extract -- should be more than hypothesized! } \item{rotate}{ what rotation to use c("none", "varimax", "oblimin","promax")} \item{diagonal}{Should we fit the diagonal as well } \item{fm}{factoring method -- fm="pa" Principal Axis Factor Analysis, fm = "minres" minimum residual (OLS) factoring fm="mle" Maximum Likelihood FA, fm="pc" Principal Components" } \item{n.obs}{Number of observations if doing a factor analysis of correlation matrix. This value is ignored by VSS but is necessary for the ML factor analysis package.} \item{plot}{plot=TRUE Automatically call VSS.plot with the VSS output, otherwise don't plot} \item{title}{a title to be passed on to VSS.plot} \item{pch}{the plot character for the nfactors plots} \item{use}{If doing covariances or Pearson R, should we use "pairwise" or "complete cases"} \item{cor}{What kind of correlation to find, defaults to Pearson but see fa for the choices} \item{n.iter}{How many iterations of the bootstrap for eigenCi} \item{alpha}{Width of confidence intervals = 1- alpha} \item{root}{By default show the eigen values, if root=TRUE plot the squareroots of the eigen values.} \item{\dots}{parameters to pass to the factor analysis program The most important of these is if using a correlation matrix is covmat= xx} } \details{Determining the most interpretable number of factors from a factor analysis is perhaps one of the greatest challenges in factor analysis. There are many solutions to this problem, none of which is uniformly the best. "Solving the number of factors problem is easy, I do it everyday before breakfast." But knowing the right solution is harder. (Horn and Engstrom, 1979) (Henry Kaiser in personal communication with J.L. Horn, as cited by Horn and Engstrom, 1979, MBR p 283). Techniques most commonly used include 1) Extracting factors until the chi square of the residual matrix is not significant. 2) Extracting factors until the change in chi square from factor n to factor n+1 is not significant. 3) Extracting factors until the eigen values of the real data are less than the corresponding eigen values of a random data set of the same size (parallel analysis) \code{\link{fa.parallel}}. 4) Plotting the magnitude of the successive eigen values and applying the scree test (a sudden drop in eigen values analogous to the change in slope seen when scrambling up the talus slope of a mountain and approaching the rock face. 5) Extracting principal components until the eigen value < 1. 6) Extracting factors as long as they are interpetable. 7) Using the Very Simple Structure Criterion (VSS). 8) Using Wayne Velicer's Minimum Average Partial (MAP) criterion. Each of the procedures has its advantages and disadvantages. Using either the chi square test or the change in square test is, of course, sensitive to the number of subjects and leads to the nonsensical condition that if one wants to find many factors, one simply runs more subjects. Parallel analysis is partially sensitive to sample size in that for large samples the eigen values of random factors will be very small. The scree test is quite appealling but can lead to differences of interpretation as to when the scree "breaks". The eigen value of 1 rule, although the default for many programs, seems to be a rough way of dividing the number of variables by 3. Extracting interpretable factors means that the number of factors reflects the investigators creativity more than the data. VSS, while very simple to understand, will not work very well if the data are very factorially complex. (Simulations suggests it will work fine if the complexities of some of the items are no more than 2). Most users of factor analysis tend to interpret factor output by focusing their attention on the largest loadings for every variable and ignoring the smaller ones. Very Simple Structure operationalizes this tendency by comparing the original correlation matrix to that reproduced by a simplified version (S) of the original factor matrix (F). R = SS' + U2. S is composed of just the c greatest (in absolute value) loadings for each variable. C (or complexity) is a parameter of the model and may vary from 1 to the number of factors. The VSS criterion compares the fit of the simplified model to the original correlations: VSS = 1 -sumsquares(r*)/sumsquares(r) where R* is the residual matrix R* = R - SS' and r* and r are the elements of R* and R respectively. VSS for a given complexity will tend to peak at the optimal (most interpretable) number of factors (Revelle and Rocklin, 1979). Although originally written in Fortran for main frame computers, VSS has been adapted to micro computers (e.g., Macintosh OS 6-9) using Pascal. We now release R code for calculating VSS. Note that if using a correlation matrix (e.g., my.matrix) and doing a factor analysis, the parameters n.obs should be specified for the factor analysis: e.g., the call is VSS(my.matrix,n.obs=500). Otherwise it defaults to 1000. Wayne Velicer's MAP criterion has been added as an additional test for the optimal number of components to extract. Note that VSS and MAP will not always agree as to the optimal number. The nfactors function will do a VSS, find MAP, and report a number of other criteria (e.g., BIC, complexity, chi square, ...) A variety of rotation options are available. These include varimax, promax, and oblimin. Others can be added. Suggestions are welcome. } \value{ A data.frame with entries: map: Velicer's MAP values (lower values are better) \cr dof: degrees of freedom (if using FA) \cr chisq: chi square (from the factor analysis output (if using FA) \cr prob: probability of residual matrix > 0 (if using FA) \cr sqresid: squared residual correlations\cr RMSEA: the RMSEA for each number of factors \cr BIC: the BIC for each number of factors \cr eChiSq: the empirically found chi square \cr eRMS: Empirically found mean residual \cr eCRMS: Empirically found mean residual corrected for df \cr eBIC: The empirically found BIC based upon the eChiSq \cr fit: factor fit of the complete model\cr cfit.1: VSS fit of complexity 1\cr cfit.2: VSS fit of complexity 2 \cr ... \cr cfit.8: VSS fit of complexity 8\cr cresidiual.1: sum squared residual correlations for complexity 1\cr ...: sum squared residual correlations for complexity 2 ..8\cr } \references{ \url{https://personality-project.org/r/vss.html}, Revelle, W. An introduction to psychometric theory with applications in R (in prep) Springer. Draft chapters available at \url{https://personality-project.org/r/book/} Revelle, W. and Rocklin, T. 1979, Very Simple Structure: an Alternative Procedure for Estimating the Optimal Number of Interpretable Factors, Multivariate Behavioral Research, 14, 403-414. \url{https://personality-project.org/revelle/publications/vss.pdf} Velicer, W. (1976) Determining the number of components from the matrix of partial correlations. Psychometrika, 41, 321-327. } \author{William Revelle} \seealso{ \code{\link{VSS.plot}}, \code{\link{ICLUST}}, \code{\link{omega}}, \code{\link{fa.parallel}}} \examples{ #test.data <- Harman74.cor$cov #my.vss <- VSS(test.data,title="VSS of 24 mental tests") #print(my.vss[,1:12],digits =2) #VSS.plot(my.vss, title="VSS of 24 mental tests") #now, some simulated data with two factors #VSS(sim.circ(nvar=24),fm="minres" ,title="VSS of 24 circumplex variables") VSS(sim.item(nvar=24),fm="minres" ,title="VSS of 24 simple structure variables") } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/Yule.Rd0000644000176200001440000001232413463337620013371 0ustar liggesusers\name{Yule} \alias{Yule} \alias{Yule.inv} \alias{Yule2phi} \alias{Yule2tetra} \alias{Yule2poly} \alias{YuleBonett} \alias{YuleCor} \title{From a two by two table, find the Yule coefficients of association, convert to phi, or tetrachoric, recreate table the table to create the Yule coefficient.} \description{One of the many measures of association is the Yule coefficient. Given a two x two table of counts \cr \tabular{llll}{ \tab a \tab b \tab R1 \cr \tab c \tab d \tab R2 \cr \tab C1 \tab C2 \tab n \cr } Yule Q is (ad - bc)/(ad+bc). \cr Conceptually, this is the number of pairs in agreement (ad) - the number in disagreement (bc) over the total number of paired observations. Warren (2008) has shown that Yule's Q is one of the ``coefficients that have zero value under statistical independence, maximum value unity, and minimum value minus unity independent of the marginal distributions" (p 787). \cr ad/bc is the odds ratio and Q = (OR-1)/(OR+1) \cr Yule's coefficient of colligation is Y = (sqrt(OR) - 1)/(sqrt(OR)+1) Yule.inv finds the cell entries for a particular Q and the marginals (a+b,c+d,a+c, b+d). This is useful for converting old tables of correlations into more conventional \code{\link{phi}} or tetrachoric correlations \code{\link{tetrachoric}} \cr Yule2phi and Yule2tetra convert the Yule Q with set marginals to the correponding phi or tetrachoric correlation. Bonett and Price show that the Q and Y coefficients are both part of a general family of coefficients raising the OR to a power (c). If c=1, then this is Yule's Q. If .5, then Yule's Y, if c = .75, then this is Digby's H. They propose that c = .5 - (.5 * min(cell probabilty)^2 is a more general coefficient. YuleBonett implements this for the 2 x 2 case, YuleCor for the data matrix case. } \usage{ YuleBonett(x,c=1,bonett=FALSE,alpha=.05) #find the generalized Yule cofficients YuleCor(x,c=1,bonett=FALSE,alpha=.05) #do this for a matrix Yule(x,Y=FALSE) #find Yule given a two by two table of frequencies #find the frequencies that produce a Yule Q given the Q and marginals Yule.inv(Q,m,n=NULL) #find the phi coefficient that matches the Yule Q given the marginals Yule2phi(Q,m,n=NULL) Yule2tetra(Q,m,n=NULL,correct=TRUE) #Find the tetrachoric correlation given the Yule Q and the marginals #(deprecated) Find the tetrachoric correlation given the Yule Q and the marginals Yule2poly(Q,m,n=NULL,correct=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A vector of four elements or a two by two matrix, or, in the case of YuleBonett or YuleCor, this can also be a data matrix } \item{c}{1 returns Yule Q, .5, Yule's Y, .75 Digby's H} \item{bonett}{If FALSE, then find Q, Y, or H, if TRUE, then find the generalized Bonett cofficient} \item{alpha}{The two tailed probability for confidence intervals} \item{Y}{Y=TRUE return Yule's Y coefficient of colligation} \item{Q}{Either a single Yule coefficient or a matrix of Yule coefficients} \item{m}{The vector c(R1,C2) or a two x two matrix of marginals or a four element vector of marginals. The preferred form is c(R1,C1)} \item{n}{The number of subjects (if the marginals are given as frequencies} \item{correct}{When finding a tetrachoric correlation, should small cell sizes be corrected for continuity. See \code{\{link{tetrachoric}} for a discussion.} } \details{Yule developed two measures of association for two by two tables. Both are functions of the odds ratio } \value{ \item{Q}{The Yule Q coefficient} \item{R}{A two by two matrix of counts} \item{result}{If given matrix input, then a matrix of phis or tetrachorics} \item{rho}{From YuleBonett and YuleCor} \item{ci}{The upper and lower confidence intervals in matrix form (From YuleBonett and YuleCor).} } \references{Yule, G. Uday (1912) On the methods of measuring association between two attributes. Journal of the Royal Statistical Society, LXXV, 579-652 Bonett, D.G. and Price, R.M, (2007) Statistical Inference for Generalized Yule Coefficients in 2 x 2 Contingency Tables. Sociological Methods and Research, 35, 429-446. Warrens, Matthijs (2008), On Association Coefficients for 2x2 Tables and Properties That Do Not Depend on the Marginal Distributions. Psychometrika, 73, 777-789. } \author{ William Revelle } \note{Yule.inv is currently done by using the optimize function, but presumably could be redone by solving a quadratic equation. } \seealso{ See Also as \code{\link{phi}}, \code{\link{tetrachoric}}, \code{\link{Yule2poly.matrix}}, \code{\link{Yule2phi.matrix}} } \examples{ Nach <- matrix(c(40,10,20,50),ncol=2,byrow=TRUE) Yule(Nach) Yule.inv(.81818,c(50,60),n=120) Yule2phi(.81818,c(50,60),n=120) Yule2tetra(.81818,c(50,60),n=120) phi(Nach) #much less #or express as percents and do not specify n Nach <- matrix(c(40,10,20,50),ncol=2,byrow=TRUE) Nach/120 Yule(Nach) Yule.inv(.81818,c(.41667,.5)) Yule2phi(.81818,c(.41667,.5)) Yule2tetra(.81818,c(.41667,.5)) phi(Nach) #much less YuleCor(psychTools::ability[,1:4],,TRUE) YuleBonett(Nach,1) #Yule Q YuleBonett(Nach,.5) #Yule Y YuleBonett(Nach,.75) #Digby H YuleBonett(Nach,,TRUE) #Yule* is a generalized Yule } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{models} psych/man/reverse.code.Rd0000644000176200001440000000263012006776270015036 0ustar liggesusers\name{reverse.code} \alias{reverse.code} \title{Reverse the coding of selected items prior to scale analysis} \description{Some IRT functions require all items to be coded in the same direction. Some data sets have items that need to be reverse coded (e.g., 6 -> 1, 1 -> 6). reverse.code will flip items based upon a keys vector of 1s and -1s. Reversed items are subtracted from the item max + item min. These may be specified or may be calculated. } \usage{ reverse.code(keys, items, mini = NULL, maxi = NULL)} \arguments{ \item{keys}{A vector of 1s and -1s. -1 implies reverse the item} \item{items}{A data set of items} \item{mini}{if NULL, the empirical minimum for each item. Otherwise, a vector of minima} \item{maxi}{f NULL, the empirical maximum for each item. Otherwise, a vector of maxima} } \details{Not a very complicated function, but useful in the case that items need to be reversed prior to using IRT functions from the ltm or eRM packages. Most psych functions do not require reversing prior to analysis, but will do so within the function. } \value{ The corrected items. } \examples{ original <- matrix(sample(6,50,replace=TRUE),10,5) keys <- c(1,1,-1,-1,1) #reverse the 3rd and 4th items new <- reverse.code(keys,original,mini=rep(1,5),maxi=rep(6,5)) original[1:3,] new[1:3,] } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate}psych/man/sim.structural.Rd0000644000176200001440000001544014306123565015452 0ustar liggesusers\name{sim.structure} \alias{sim.structure} \alias{sim.structural} \alias{sim.correlation} \alias{simCor} \title{Create correlation matrices or data matrices with a particular measurement and structural model } \description{Structural Equation Models decompose correlation or correlation matrices into a measurement (factor) model and a structural (regression) model. sim.structural creates data sets with known measurement and structural properties. Population or sample correlation matrices with known properties are generated. Optionally raw data are produced. It is also possible to specify a measurement model for a set of x variables separately from a set of y variables. They are then combined into one model with the correlation structure between the two sets. Finally, the general case is given a population correlation matrix, generate data that reproduce (with sampling variability) that correlation matrix. \code{\link{simCor}} \code{\link{sim.correlation}}. } \usage{ sim.structure(fx=NULL,Phi=NULL, fy=NULL, f=NULL, n=0, uniq=NULL, raw=TRUE, items = FALSE, low=-2,high=2,d=NULL,cat=5, mu=0) sim.structural(fx=NULL, Phi=NULL, fy=NULL, f=NULL, n=0, uniq=NULL, raw=TRUE, items = FALSE, low=-2,high=2,d=NULL,cat=5, mu=0) #deprecated simCor(R,n=1000,data=FALSE,scale=TRUE, skew=c("none","log","lognormal", "sqrt","abs"),vars=NULL,latent=FALSE,quant=NULL) sim.correlation(R,n=1000,data=FALSE,scale=TRUE, skew=c("none","log","lognormal", "sqrt","abs"),vars=NULL,latent=FALSE,quant=NULL) } \arguments{ \item{fx}{The measurement model for x} \item{Phi}{The structure matrix of the latent variables} \item{fy}{The measurement model for y} \item{f}{ The measurement model} \item{n}{ Number of cases to simulate. If n=0, the population matrix is returned.} \item{uniq}{The uniquenesses if creating a covariance matrix} \item{raw}{if raw=TRUE, raw data are returned as well for n > 0.} \item{items}{TRUE if simulating items, FALSE if simulating scales} \item{low}{Restrict the item difficulties to range from low to high} \item{high}{Restrict the item difficulties to range from low to high} \item{d}{A vector of item difficulties, if NULL will range uniformly from low to high} \item{cat}{Number of categories when creating binary (2) or polytomous items} \item{mu}{A vector of means, defaults to 0} \item{R}{The correlation matrix to reproduce} \item{data}{if TRUE, return the raw data, otherwise return the sample correlation matrix.} \item{scale}{standardize the simulated data?} \item{skew}{Defaults to none (the multivariate normal case. Alternatives take the log, the squareroot, or the absolute value of latent or observed data )} \item{vars}{Apply the skewing or cuts to just these variables. If NULL, to all the variables/} \item{latent}{Should the skewing transforms be applied to the latent variables, or the observed variables?} \item{quant}{Either a single number or a vector length nvar. The data will be dichotomized at quant.} } \details{Given the measurement model, fx and the structure model Phi, the model is f \%*\% Phi \%*\% t(f). Reliability is f \%*\% t(f). \eqn{f \phi f'} and the reliability for each test is the items communality or just the diag of the model. If creating a correlation matrix, (uniq=NULL) then the diagonal is set to 1, otherwise the diagonal is diag(model) + uniq and the resulting structure is a covariance matrix. %Given the model, raw data are generated using the mvnorm function. A special case of a structural model are one factor models such as parallel tests, tau equivalent tests, and congeneric tests. These may be created by letting the structure matrix = 1 and then defining a vector of factor loadings. Alternatively, \code{\link{sim.congeneric}} will do the same. The general case is to use \code{\link{simCor}} aka \code{\link{sim.correlation}} which will create data sampled from a specified correlation matrix for a particular sample size. This follows a procedure described by Kaiser and Dickman (1962). If desired, it will just return the sample correlation matrix. With data=TRUE, it will return the sample data as well. It uses an eigen value decomposition of the original matrix times a matrix of random normal deviates (code adapted from the mvnorm function of Brian Ripley's MASS package). These resulting scores may be transformed using a number of transforms (see the skew option) or made into dichotomous variables (see quant option) for all or a select set (vars option) of the variables. } \value{ \item{model }{The implied population correlation or covariance matrix} \item{reliability }{The population reliability values} \item{r}{The sample correlation or covariance matrix} \item{observed}{If raw=TRUE, a sample data matrix} } \references{ Kaiser, H.F. and Dickman, W. (1962) Sample and population score matrices and sample correlation matrices from an arbitrary population correlation matrix. Psychometrika, 27, 179-182. Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. Springer. at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \seealso{ \code{\link{make.hierarchical}} for another structural model and \code{\link{make.congeneric}} for the one factor case. \code{\link{structure.list}} and \code{\link{structure.list}} for making symbolic structures. } \examples{ #First, create a sem like model with a factor model of x and ys with correlation Phi fx <-matrix(c( .9,.8,.6,rep(0,4),.6,.8,-.7),ncol=2) fy <- matrix(c(.6,.5,.4),ncol=1) rownames(fx) <- c("V","Q","A","nach","Anx") rownames(fy)<- c("gpa","Pre","MA") Phi <-matrix( c(1,0,.7,.0,1,.7,.7,.7,1),ncol=3) #now create this structure gre.gpa <- sim.structural(fx,Phi,fy) print(gre.gpa,2) #correct for attenuation to see structure #the raw correlations are below the diagonal, the adjusted above round(correct.cor(gre.gpa$model,gre.gpa$reliability),2) #These are the population values, # we can also create a correlation matrix sampled from this population GRE.GPA <- sim.structural(fx,Phi,fy,n=250,raw=FALSE) lowerMat(GRE.GPA$r) #or we can show data sampled from such a population GRE.GPA <- sim.structural(fx,Phi,fy,n=250,raw=TRUE) lowerCor(GRE.GPA$observed) congeneric <- sim.structure(f=c(.9,.8,.7,.6)) # a congeneric model congeneric #now take this correlation matrix as a population value and create samples from it example.congeneric <- sim.correlation(congeneric$model,n=200) #create a sample matrix lowerMat(example.congeneric ) #show the correlation matrix #or create another sample and show the data example.congeneric.data <- simCor(congeneric$model,n=200,data=TRUE) describe(example.congeneric.data) lowerCor(example.congeneric.data ) example.skewed <- simCor(congeneric$model,n=200,vars=c(1,2),data=TRUE,skew="log") describe(example.skewed) } \keyword{multivariate } \keyword{datagen} psych/man/cosinor.Rd0000644000176200001440000002721312726336772014142 0ustar liggesusers\name{cosinor} \alias{cosinor} \alias{circadian.phase} \alias{cosinor.plot} \alias{cosinor.period} \alias{circadian.mean} \alias{circadian.sd} \alias{circadian.cor} \alias{circadian.linear.cor} \alias{circadian.stats} \alias{circadian.F} \alias{circadian.reliability} \alias{circular.mean} \alias{circular.cor} \title{Functions for analysis of circadian or diurnal data} \description{Circadian data are periodic with a phase of 24 hours. These functions find the best fitting phase angle (cosinor), the circular mean, circular correlation with circadian data, and the linear by circular correlation} \usage{ cosinor(angle,x=NULL,code=NULL,data=NULL,hours=TRUE,period=24, plot=FALSE,opti=FALSE,na.rm=TRUE) cosinor.plot(angle,x=NULL,data = NULL, IDloc=NULL, ID=NULL,hours=TRUE, period=24, na.rm=TRUE,ylim=NULL,ylab="observed",xlab="Time (double plotted)", main="Cosine fit",add=FALSE,multi=FALSE,typ="l",...) cosinor.period(angle,x=NULL,code=NULL,data=NULL,hours=TRUE,period=seq(23,26,1), plot=FALSE,opti=FALSE,na.rm=TRUE) circadian.phase(angle,x=NULL,code=NULL,data=NULL,hours=TRUE,period=24, plot=FALSE,opti=FALSE,na.rm=TRUE) circadian.mean(angle,data=NULL, hours=TRUE,na.rm=TRUE) circadian.sd(angle,data=NULL,hours=TRUE,na.rm=TRUE) circadian.stats(angle,data=NULL,hours=TRUE,na.rm=TRUE) circadian.F(angle,group,data=NULL,hours=TRUE,na.rm=TRUE) circadian.reliability(angle,x=NULL,code=NULL,data = NULL,min=16, oddeven=FALSE, hours=TRUE,period=24,plot=FALSE,opti=FALSE,na.rm=TRUE) circular.mean(angle,na.rm=TRUE) #angles in radians circadian.cor(angle,data=NULL,hours=TRUE,na.rm=TRUE) #angles in radians circular.cor(angle,na.rm=TRUE) #angles in radians circadian.linear.cor(angle,x=NULL,data=NULL,hours=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{angle}{A data frame or matrix of observed values with the time of day as the first value (unless specified in code) angle can be specified either as hours or as radians)} \item{code}{A subject identification variable} \item{data}{A matrix or data frame of data. If specified, then angle and code are variable names (or locations). See examples.} \item{group}{If doing comparisons by groups, specify the group code}. \item{min}{The minimum number of observations per subject to use when finding split half reliabilities.} \item{oddeven}{Reliabilities are based upon odd and even items (TRUE) or first vs. last half (FALSE). Default is first and last half.} \item{period}{Although time of day is assumed to have a 24 hour rhythm, other rhythms may be fit. If calling cosinor.period, a range may be specified.} \item{IDloc}{Which column number is the ID field} \item{ID}{What specific subject number should be plotted for one variable} \item{plot}{if TRUE, then plot the first variable (angle)} \item{opti}{opti=TRUE: iterative optimization (slow) or opti=FALSE: linear fitting (fast)} \item{hours}{If TRUE, measures are in 24 hours to the day, otherwise, radians} \item{x}{A set of external variables to correlate with the phase angles} \item{na.rm}{Should missing data be removed?} \item{ylim}{Specify the range of the y axis if the defaults don't work} \item{ylab}{The label of the yaxis} \item{xlab}{Labels for the x axis} \item{main}{the title of the graphic} \item{add}{If doing multiple (spagetti) plots, set add = TRUE for the second and beyond plots} \item{multi}{If doing multiple (spagetti) plots, set multi=TRUE for the first and subsequent plots} \item{typ}{Pass the line type to graphics} \item{...}{any other graphic parameters to pass} } \details{ When data represent angles (such as the hours of peak alertness or peak tension during the day), we need to apply circular statistics rather than the more normal linear statistics (see Jammalamadaka (2006) for a very clear set of examples of circular statistics). The generalization of the mean to circular data is to convert each angle into a vector, average the x and y coordinates, and convert the result back to an angle. A statistic that represents the compactness of the observations is R which is the (normalized) vector length found by adding all of the observations together. This will achieve a maximum value (1) when all the phase angles are the same and a minimum (0) if the phase angles are distributed uniformly around the clock. The generalization of Pearson correlation to circular statistics is straight forward and is implemented in cor.circular in the circular package and in \code{\link{circadian.cor}} here. Just as the Pearson r is a ratio of covariance to the square root of the product of two variances, so is the circular correlation. The circular covariance of two circular vectors is defined as the average product of the sines of the deviations from the circular mean. The variance is thus the average squared sine of the angular deviations from the circular mean. Circular statistics are used for data that vary over a period (e.g., one day) or over directions (e.g., wind direction or bird flight). Jammalamadaka and Lund (2006) give a very good example of the use of circular statistics in calculating wind speed and direction. The code from CircStats and circular was adapted to allow for analysis of data from various studies of mood over the day. Those two packages do not seem to handle missing data, nor do they take matrix input, but rather emphasize single vectors. The cosinor function will either iteratively fit cosines of the angle to the observed data (opti=TRUE) or use the circular by linear regression to estimate the best fitting phase angle. If cos.t <- cos(time) and sin.t = sin(time) (expressed in hours), then beta.c and beta.s may be found by regression and the phase is \eqn{sign(beta.c) * acos(beta.c/\sqrt(beta.c^2 + beta.s^2)) * 12/pi} Simulations (see examples) suggest that with incomplete times, perhaps the optimization procedure yields slightly better fits with the correct phase than does the linear model, but the differences are very small. In the presence of noisey data, these advantages seem to reverse. The recommendation thus seems to be to use the linear model approach (the default). The fit statistic reported for cosinor is the correlation of the data with the model [ cos(time - acrophase) ]. The \code{\link{circadian.reliability}} function splits the data for each subject into a first and second half (by default, or into odd and even items) and then finds the best fitting phase for each half. These are then correlated (using \code{\link{circadian.cor}}) and this correlation is then adjusted for test length using the conventional Spearman-Brown formula. Returned as object in the output are the statistics for the first and second part, as well as an ANOVA to compare the two halves. \code{\link{circular.mean}} and \code{\link{circular.cor}} are just \code{\link{circadian.mean}} and \code{\link{circadian.cor}} but with input given in radians rather than hours. The \code{\link{circadian.linear.cor}} function will correlate a set of circular variables with a set of linear variables. The first (angle) variables are circular, the second (x) set of variables are linear. The \code{\link{circadian.F}} will compare 2 or more groups in terms of their mean position. This is adapted from the equivalent function in the circular pacakge. This is clearly a more powerful test the more each group is compact around its mean (large values of R). } \value{ \item{phase }{The phase angle that best fits the data (expressed in hours if hours=TRUE).} \item{fit}{Value of the correlation of the fit. This is just the correlation of the data with the phase adjusted cosine.} \item{mean.angle}{A vector of mean angles} \item{n,mean,sd}{The appropriate circular statistic.} \item{correl}{A matrix of circular correlations or linear by circular correlations} \item{R}{R is the vector length (0-1) of the mean vector when finding circadian statistics using \code{\link{circadian.stats}} } \item{z,p}{z is the number of observations x R^2. p is the probability of a z.} \item{phase.rel}{The reliability of the phase measures. This is the circular correlation between the two halves adjusted using the Spearman-Brown correction.} \item{fit.rel}{The split half reliability of the fit statistic.} \item{split.F}{Do the two halves differ from each other? One would hope not.} \item{group1,group2}{The statistics from each half} \item{splits}{The individual data from each half.} } \references{ See circular statistics Jammalamadaka, Sreenivasa and Lund, Ulric (2006),The effect of wind direction on ozone levels: a case study, Environmental and Ecological Statistics, 13, 287-298. } \author{William Revelle } \note{These functions have been adapted from the circular package to allow for ease of use with circadian data, particularly for data sets with missing data and multiple variables of interest.} \seealso{See the circular and CircStats packages. } \examples{ time <- seq(1:24) #create a 24 hour time pure <- matrix(time,24,18) colnames(pure) <- paste0("H",1:18) pure <- data.frame(time,cos((pure - col(pure))*pi/12)*3 + 3) #18 different phases but scaled to 0-6 match mood data matplot(pure[-1],type="l",main="Pure circadian arousal rhythms", xlab="time of day",ylab="Arousal") op <- par(mfrow=c(2,2)) cosinor.plot(1,3,pure) cosinor.plot(1,5,pure) cosinor.plot(1,8,pure) cosinor.plot(1,12,pure) p <- cosinor(pure) #find the acrophases (should match the input) #now, test finding the acrophases for different subjects on 3 variables #They should be the first 3, second 3, etc. acrophases of pure pp <- matrix(NA,nrow=6*24,ncol=4) pure <- as.matrix(pure) pp[,1] <- rep(pure[,1],6) pp[1:24,2:4] <- pure[1:24,2:4] pp[25:48,2:4] <- pure[1:24,5:7] *2 #to test different variances pp[49:72,2:4] <- pure[1:24,8:10] *3 pp[73:96,2:4] <- pure[1:24,11:13] pp[97:120,2:4] <- pure[1:24,14:16] pp[121:144,2:4] <- pure[1:24,17:19] pure.df <- data.frame(ID = rep(1:6,each=24),pp) colnames(pure.df) <- c("ID","Time",paste0("V",1:3)) cosinor("Time",3:5,"ID",pure.df) op <- par(mfrow=c(2,2)) cosinor.plot(2,3,pure.df,IDloc=1,ID="1") cosinor.plot(2,3,pure.df,IDloc=1,ID="2") cosinor.plot(2,3,pure.df,IDloc=1,ID="3") cosinor.plot(2,3,pure.df,IDloc=1,ID="4") #now, show those in one panel as spagetti plots op <- par(mfrow=c(1,1)) cosinor.plot(2,3,pure.df,IDloc=1,ID="1",multi=TRUE,ylim=c(0,20),ylab="Modeled") cosinor.plot(2,3,pure.df,IDloc=1,ID="2",multi=TRUE,add=TRUE,lty="dotdash") cosinor.plot(2,3,pure.df,IDloc=1,ID="3",multi=TRUE,add=TRUE,lty="dashed") cosinor.plot(2,3,pure.df,IDloc=1,ID="4",multi=TRUE,add=TRUE,lty="dotted") set.seed(42) #what else? noisy <- pure noisy[,2:19]<- noisy[,2:19] + rnorm(24*18,0,.2) n <- cosinor(time,noisy) #add a bit of noise small.pure <- pure[c(8,11,14,17,20,23),] small.noisy <- noisy[c(8,11,14,17,20,23),] small.time <- c(8,11,14,17,20,23) cosinor.plot(1,3,small.pure,multi=TRUE) cosinor.plot(1,3,small.noisy,multi=TRUE,add=TRUE,lty="dashed") # sp <- cosinor(small.pure) # spo <- cosinor(small.pure,opti=TRUE) #iterative fit # sn <- cosinor(small.noisy) #linear # sno <- cosinor(small.noisy,opti=TRUE) #iterative # sum.df <- data.frame(pure=p,noisy = n, small=sp,small.noise = sn, # small.opt=spo,small.noise.opt=sno) # round(sum.df,2) # round(circadian.cor(sum.df[,c(1,3,5,7,9,11)]),2) #compare alternatives # # #now, lets form three "subjects" and show how the grouping variable works # mixed.df <- rbind(small.pure,small.noisy,noisy) # mixed.df <- data.frame(ID=c(rep(1,6),rep(2,6),rep(3,24)), # time=c(rep(c(8,11,14,17,20,23),2),1:24),mixed.df) # group.df <- cosinor(angle="time",x=2:20,code="ID",data=mixed.df) # round(group.df,2) #compare these values to the sp,sn,and n values done separately } \keyword{ multivariate } psych/man/phi.demo.Rd0000644000176200001440000000345613256544655014174 0ustar liggesusers\name{phi.demo} \alias{phi.demo} \title{ A simple demonstration of the Pearson, phi, and polychoric corelation} \description{A not very interesting demo of what happens if bivariate continuous data are dichotomized. Bascially a demo of r, phi, and polychor. } \usage{ phi.demo(n=1000,r=.6, cuts=c(-2,-1,0,1,2)) } \arguments{ \item{n}{number of cases to simulate} \item{r}{ correlation between latent and observed } \item{cuts}{form dichotomized variables at the value of cuts} } %- maybe also 'usage' for other objects documented here. \details{A demonstration of the problem of different base rates on the phi correlation, and how these are partially solved by using the polychoric correlation. Not one of my more interesting demonstrations. See \url{https://personality-project.org/r/simulating-personality.html} and \url{https://personality-project.org/r/r.datageneration.html} for better demonstrations of data generation. } \value{a matrix of correlations and a graphic plot. The items above the diagonal are the tetrachoric correlations, below the diagonal are raw correlations. } \references{\url{https://personality-project.org/r/simulating-personality.html} and \url{https://personality-project.org/r/r.datageneration.html} for better demonstrations of data generation. } \author{ William Revelle} \seealso{\code{\link{VSS.simulate}},\code{\link{item.sim}}} \examples{ #demo <- phi.demo() #compare the phi (lower off diagonal and polychoric correlations # (upper off diagonal) #show the result from tetrachoric which corrects for zero entries by default #round(demo$tetrachoric$rho,2) #show the result from phi2poly #tetrachorics above the diagonal, phi below the diagonal #round(demo$phis,2) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/factor.rotate.Rd0000644000176200001440000000430713715123242015221 0ustar liggesusers\name{factor.rotate} \alias{factor.rotate} \title{``Hand" rotate a factor loading matrix } \description{Given a factor or components matrix, it is sometimes useful to do arbitrary rotations of particular pairs of variables. This supplements the much more powerful rotation package GPArotation and is meant for specific requirements to do unusual rotations. } \usage{ factor.rotate(f, angle, col1=1, col2=2,plot=FALSE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{f}{original loading matrix or a data frame (can be output from a factor analysis function} \item{angle}{ angle (in degrees!) to rotate } \item{col1}{ column in factor matrix defining the first variable} \item{col2}{ column in factor matrix defining the second variable } \item{plot}{plot the original (unrotated) and rotated factors} \item{...}{parameters to pass to fa.plot} } \details{Partly meant as a demonstration of how rotation works, factor.rotate is useful for those cases that require specific rotations that are not available in more advanced packages such as GPArotation. If the plot option is set to TRUE, then the original axes are shown as dashed lines. The rotation is in degrees counter clockwise. } \value{the resulting rotated matrix of loadings. } \references{ \url{https://personality-project.org/r/book/} \cr } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu } } \note{For a complete rotation package, see GPArotation } \examples{ #using the Harman 24 mental tests, rotate the 2nd and 3rd factors 45 degrees f4<- fa(Harman74.cor$cov,4,rotate="TRUE") f4r45 <- factor.rotate(f4,45,2,3) f4r90 <- factor.rotate(f4r45,45,2,3) print(factor.congruence(f4,f4r45),digits=3) #poor congruence with original print(factor.congruence(f4,f4r90),digits=3) #factor 2 and 3 have been exchanged and 3 flipped #a graphic example data(Harman23.cor) f2 <- fa(Harman23.cor$cov,2,rotate="none") op <- par(mfrow=c(1,2)) cluster.plot(f2,xlim=c(-1,1),ylim=c(-1,1),title="Unrotated ") f2r <- factor.rotate(f2,-33,plot=TRUE,xlim=c(-1,1),ylim=c(-1,1),title="rotated -33 degrees") op <- par(mfrow=c(1,1)) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/error.bars.by.Rd0000644000176200001440000002161314444325550015143 0ustar liggesusers\name{error.bars.by} \alias{error.bars.by} \title{ Plot means and confidence intervals for multiple groups} \description{One of the many functions in R to plot means and confidence intervals. Meant mainly for demonstration purposes for showing the probabilty of replication from multiple samples. Can also be combined with such functions as boxplot to summarize distributions. Means and standard errors for each group are calculated using \code{\link{describeBy}}. } \usage{ error.bars.by(x,group,data=NULL, by.var=FALSE,x.cat=TRUE,ylab =NULL,xlab=NULL, main=NULL, ylim= NULL, xlim=NULL, eyes=TRUE, alpha=.05,sd=FALSE,labels=NULL,v.labels=NULL,v2.labels=NULL, add.labels=NULL,pos=NULL, arrow.len=.05, min.size=1,add=FALSE, bars=FALSE,within=FALSE, colors=c("black","blue","red"), lty,lines=TRUE, legend=0,pch=16,density=-10,stats=NULL,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A data frame or matrix } \item{group}{A grouping variable} \item{data}{If using formula input, the data file must be specified} \item{by.var}{A different line for each group (default) or each variable} \item{x.cat}{Is the grouping variable categorical (TRUE) or continuous (FALSE} \item{ylab}{y label} \item{xlab}{x label} \item{main}{title for figure} \item{ylim}{if specified, the y limits for the plot, otherwise based upon the data} \item{xlim}{if specified, the x limits for the plot, otherwise based upon the data} \item{eyes}{Should 'cats eyes' be drawn'} \item{alpha}{alpha level of confidence interval. Default is 1- alpha =95\% confidence interval} \item{sd}{sd=TRUE will plot Standard Deviations instead of standard errors} \item{labels}{ X axis label } \item{v.labels}{For a bar plot legend, these are the variable labels, for a line plot, the labels of the grouping variable.} \item{v2.labels}{the names for the 2nd grouping variable, if there is one} \item{add.labels}{if !NULL, then add the v2.labels to the left/right of the lines (add.labels="right")} \item{pos}{where to place text: below, left, above, right} \item{arrow.len}{ How long should the top of the error bars be?} \item{min.size}{Draw error bars for groups > min.size} \item{add}{ add=FALSE, new plot, add=TRUE, just points and error bars} \item{bars}{Draw a barplot with error bars rather than a simple plot of the means} \item{within}{Should the s.e. be corrected by the correlation with the other variables?} \item{colors}{groups will be plotted in different colors (mod n.groups). See the note for how to make them transparent.} \item{lty}{line type may be specified in the case of not plotting by variables} \item{lines}{By default, when plotting different groups, connect the groups with a line of type = lty. If lines is FALSE, then do not connect the groups} \item{legend}{Where should the legend be drawn: 0 (do not draw it), 1= lower right corner, 2 = bottom, 3 ... 8 continue clockwise, 9 is the center} \item{pch}{The first plot symbol to use. Subsequent groups are pch + group} \item{density}{How many lines/inch should fill the cats eyes. If missing, non-transparent colors are used. If negative, transparent colors are used. May be a vector for different values.} \item{stats}{if specified, the means, sd, n and perhaps of se of a data set to be plotted} \item{\dots}{other parameters to pass to the plot function e.g., lty="dashed" to draw dashed lines} } \details{Drawing the mean +/- a confidence interval is a frequently used function when reporting experimental results. By default, the confidence interval is 1.96 standard errors (adjusted for the t-distribution). Improved/modified in August, 2018 to allow formula input (see examples) as well as to more properly handle multiple groups. Following a request for better labeling of the grouping variables, the v.lab option is implemented for line graphs as well as bar graphs. Note that if using multiple grouping variables, the labels are for the variable with the most levels (which should be the first one.) This function was originally just a wrapper for \code{\link{error.bars}} but has been written to allow groups to be organized either as the x axis or as separate lines. If desired, a barplot with error bars can be shown. Many find this type of plot to be uninformative (e.g., https://biostat.mc.vanderbilt.edu/DynamitePlots ) and recommend the more standard dot plot. Note in particular, if choosing to draw barplots, the starting value is 0.0 and setting the ylim parameter can lead to some awkward results if 0 is not included in the ylim range. Did you really mean to draw a bar plot in this case? For up to three groups, the colors are by default "black", "blue" and "red". For more than 3 groups, they are by default rainbow colors with an alpha factor (transparency) of .5. To make colors semitransparent, set the density to a negative number. See the last example. } \value{Graphic output showing the means + x\% confidence intervals for each group. For ci=1.96, and normal data, this will be the 95\% confidence region. For ci=1, the 68\% confidence region. These confidence regions are based upon normal theory and do not take into account any skew in the variables. More accurate confidence intervals could be found by resampling. The results of describeBy are reported invisibly. } \seealso{ See Also as \code{\link{error.crosses}}, \code{\link{histBy}}, \code{\link{scatterHist}}, \code{\link{error.bars}} and \code{\link{error.dots}} } \examples{ data(sat.act) #The generic plot of variables by group error.bars.by( SATV + SATQ ~ gender,data=sat.act) #formula input error.bars.by( SATV + SATQ ~ gender,data=sat.act,v.lab=cs(male,female)) #labels error.bars.by(SATV + SATQ ~ education + gender, data =sat.act) #see below error.bars.by(sat.act[1:4],sat.act$gender,legend=7) #specification of variables error.bars.by(sat.act[1:4],sat.act$gender,legend=7,labels=cs(male,female)) #a bar plot error.bars.by(sat.act[5:6],sat.act$gender,bars=TRUE,labels=c("male","female"), main="SAT V and SAT Q by gender",ylim=c(0,800),colors=c("red","blue"), legend=5,v.labels=c("SATV","SATQ")) #draw a barplot #a bar plot of SAT by age -- not recommended, see the next plot error.bars.by(SATV + SATQ ~ education,data=sat.act,bars=TRUE,xlab="Education", main="95 percent confidence limits of Sat V and Sat Q", ylim=c(0,800), v.labels=c("SATV","SATQ"),colors=c("red","blue") ) #a better graph uses points not bars #use formulat input #plot SAT V and SAT Q by education error.bars.by(SATV + SATQ ~ education,data=sat.act,TRUE, xlab="Education", legend=5,labels=colnames(sat.act[5:6]),ylim=c(525,700), main="self reported SAT scores by education", v.lab =c("HS","in coll", "< 16", "BA/BS", "in Grad", "Grad/Prof")) #make the cats eyes semi-transparent by specifying a negative density error.bars.by(SATV + SATQ ~ education,data=sat.act, xlab="Education", legend=5,labels=c("SATV","SATQ"),ylim=c(525,700), main="self reported SAT scores by education",density=-10, v.lab =c("HS","in coll", "< 16", "BA/BS", "in Grad", "Grad/Prof")) #use labels to specify the 2nd grouping variable, v.lab to specify the first error.bars.by(SATV ~ education + gender,data=sat.act, xlab="Education", legend=5,labels=cs(male,female),ylim=c(525,700), main="self reported SAT scores by education",density=-10, v.lab =c("HS","in coll", "< 16", "BA/BS", "in Grad", "Grad/Prof"), colors=c("red","blue")) #now for a more complicated examples using 25 big 5 items scored into 5 scales #and showing age trends by decade #this shows how to convert many levels of a grouping variable (age) into more manageable levels. data(bfi) #The Big 5 data #first create the keys keys.list <- list(Agree=c(-1,2:5),Conscientious=c(6:8,-9,-10), Extraversion=c(-11,-12,13:15),Neuroticism=c(16:20),Openness = c(21,-22,23,24,-25)) keys <- make.keys(psychTools::bfi,keys.list) #then create the scores for those older than 10 and less than 80 bfis <- subset(psychTools::bfi,((psychTools::bfi$age > 10) & (psychTools::bfi$age < 80))) scores <- scoreItems(keys,bfis,min=1,max=6) #set the right limits for item reversals #now draw the results by age #specify the particular colors to use error.bars.by(scores$scores,round(bfis$age/10)*10,by.var=TRUE, main="BFI age trends",legend=3,labels=colnames(scores$scores), xlab="Age",ylab="Mean item score", colors=cs(green,yellow,black,red,blue), v.labels =cs(10-14,15-24,25-34,35-44,45-54,55-64,65-74)) #show transparency error.bars.by(scores$scores,round(bfis$age/10)*10,by.var=TRUE, main="BFI age trends",legend=3,labels=colnames(scores$scores), xlab="Age",ylab="Mean item score", density=-10, colors=cs(green,yellow,black,red,blue), v.labels =cs(10-14,15-24,25-34,35-44,45-54,55-64,65-74)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/score.irt.Rd0000644000176200001440000003362014041556052014360 0ustar liggesusers\name{scoreIrt} \alias{scoreIrt} \alias{scoreIrt.1pl} \alias{scoreIrt.2pl} \alias{score.irt} \alias{score.irt.poly} \alias{score.irt.2} \alias{irt.stats.like} \alias{make.irt.stats} \alias{irt.tau} \alias{irt.se} \title{Find Item Response Theory (IRT) based scores for dichotomous or polytomous items} \description{\code{\link{irt.fa}} finds Item Response Theory (IRT) parameters through factor analysis of the tetrachoric or polychoric correlations of dichtomous or polytomous items. \code{\link{scoreIrt}} uses these parameter estimates of discrimination and location to find IRT based scores for the responses. As many factors as found for the correlation matrix will be scored. \code{\link{scoreIrt.2pl}} will score lists of scales. } \usage{ scoreIrt(stats=NULL, items, keys=NULL,cut = 0.3,bounds=c(-4,4),mod="logistic") scoreIrt.1pl(keys.list,items,correct=.5,messages=FALSE,cut=.3,bounds=c(-4,4), mod="logistic") #Rasch like scaling scoreIrt.2pl(itemLists,items,correct=.5,messages=FALSE,cut=.3,bounds=c(-4,4), mod="logistic") #2 pl scoring #the next is an alias for scoreIrt both of which are wrappers for # score.irt.2 and score.irt.poly score.irt(stats=NULL, items, keys=NULL,cut = 0.3,bounds=c(-4,4),mod="logistic") #the higher order call just calls one of the next two #for dichotomous items score.irt.2(stats, items,keys=NULL,cut = 0.3,bounds=c(-4,4),mod="logistic") #for polytomous items score.irt.poly(stats, items, keys=NULL, cut = 0.3,bounds=c(-4,4),mod="logistic") #to create irt like statistics for plotting irt.stats.like(items,stats,keys=NULL,cut=.3) make.irt.stats(difficulty,discrimination) irt.tau(x) #find the tau values for the x object irt.se(stats,scores=0,D=1.702) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{stats}{Output from irt.fa is used for parameter estimates of location and discrimination. Stats may also be the output from a normal factor analysis (fa). If stats is a data.frame of discrimination and thresholds from some other data set, these values will be used. See the last example. } \item{items}{The raw data, may be either dichotomous or polytomous.} \item{itemLists}{a list of items to be factored and scored for each scale, can be a keys.list as used in scoreItems or scoreIrt.1pl} \item{keys.list}{A list of items to be scored with keying direction (see example)} \item{keys}{A keys matrix of which items should be scored for each factor} \item{cut}{Only items with discrimination values > cut will be used for scoring.} \item{x}{The raw data to be used to find the tau parameter in irt.tau} \item{bounds}{The lower and upper estimates for the fitting function} \item{mod}{Should a logistic or normal model be used in estimating the scores?} \item{correct}{What value should be used for continuity correction when finding the tetrachoric or polychoric correlations when using irt.fa} \item{messages}{Should messages be suppressed when running multiple scales?} \item{scores}{A single score or a vector of scores to find standard errors} \item{D}{The scaling function for the test information statistic used in irt.se} \item{difficulty}{The difficulties for each item in a polytomous scoring} \item{discrimination}{The item discrimin} } \details{Although there are more elegant ways of finding subject scores given a set of item locations (difficulties) and discriminations, simply finding that value of theta \eqn{\theta} that best fits the equation \eqn{P(x|\theta) = 1/(1+exp(\beta(\delta - \theta) )} for a score vector X, and location \eqn{\delta} and discrimination \eqn{\beta} provides more information than just total scores. With complete data, total scores and irt estimates are almost perfectly correlated. However, the irt estimates provide much more information in the case of missing data. The bounds parameter sets the lower and upper limits to the estimate. This is relevant for the case of a subject who gives just the lowest score on every item, or just the top score on every item. Formerly (prior to 1.6.12) this was done by estimating these taial scores by finding the probability of missing every item taken, converting this to a quantile score based upon the normal distribution, and then assigning a z value equivalent to 1/2 of that quantile. Similarly, if a person gets all the items they take correct, their score is defined as the quantile of the z equivalent to the probability of getting all of the items correct, and then moving up the distribution half way. If these estimates exceed either the upper or lower bounds, they are adjusted to those boundaries. As of 1.6.9, the procedure is very different. We now assume that all items are bounded with one passed item that is easier than all items given, and one failed item that is harder than any item given. This produces much cleaner results. There are several more elegant packages in R that provide Full Information Maximum Likeliood IRT based estimates. In particular, the MIRT package seems especially good. The ltm package give equivalent estimates to MIRT for dichotomous data but produces unstable estimates for polytomous data and should be avoided. Although the scoreIrt estimates are are not FIML based they seem to correlated with the MIRT estiamtes with values exceeding .99. Indeed, based upon very limited simulations there are some small hints that the solutions match the true score estimates slightly better than do the MIRT estimates. \code{\link{scoreIrt}} seems to do a good job of recovering the basic structure. If trying to use item parameters from a different data set (e.g. some standardization sample), specify the stats as a data frame with the first column representing the item discriminations, and the next columns the item difficulties. See the last example. The two wrapper functions \code{\link{scoreIrt.1pl}} and \code{\link{scoreIrt.2pl}} are very fast and are meant for scoring one or many scales at a time with a one factor model (\code{\link{scoreIrt.2pl}}) or just Rasch like scoring. Just specify the scoring direction for a number of scales (\code{\link{scoreIrt.1pl}}) or just items to score for a number of scales \code{\link{scoreIrt.2pl}}. \code{\link{scoreIrt.2pl}} will then apply \code{\link{irt.fa}} to the items for each scale separately, and then find the 2pl scores. The keys.list is a list of items to score for each scale. Preceding the item name with a negative sign will reverse score that item (relevant for \code{\link{scoreIrt.1pl}}. Alternatively, a keys matrix can be created using \code{\link{make.keys}}. The keys matrix is a matrix of 1s, 0s, and -1s reflecting whether an item should be scored or not scored for a particular factor. See \code{\link{scoreItems}} or \code{\link{make.keys}} for details. The default case is to score all items with absolute discriminations > cut. If one wants to score scales taking advantage of differences in item location but not do a full IRT analysis, then find the item difficulties from the raw data using \code{\link{irt.tau}} or combine this information with a scoring keys matrix (see \code{\link{scoreItems}} and \code{\link{make.keys}} and create quasi-IRT statistics using \code{\link{irt.stats.like}}. This is the equivalent of doing a quasi-Rasch model, in that all items are assumed to be equally discriminating. In this case, tau values may be found first (using \code{\link{irt.tau}} or just found before doing the scoring. This is all done for you inside of \code{\link{scoreIrt.1pl}}. Such irt based scores are particularly useful if finding scales based upon massively missing data (e.g., the SAPA data sets). Even without doing the full irt analysis, we can take into account different item difficulties. David Condon has added a very nice function to do 2PL analysis for a number of scales at one time. \code{\link{scoreIrt.2pl}} takes the raw data file and a list of items to score for each of multiple scales. These are then factored (currently just one factor for each scale) and the loadings and difficulties are used for scoring. There are conventionally two different metrics and models that are used. The logistic metric and model and the normal metric and model. These are chosen using the mod parameter. \code{\link{irt.se}} finds the standard errors for scores with a particular value. These are based upon the information curves calculated by \code{\link{irt.fa}} and are not based upon the particular score of a particular subject. } \value{ \item{scores}{A data frame of theta estimates, total scores based upon raw sums, and estimates of fit.} \item{tau}{Returned by irt.tau: A data frame of the tau values for an object of dichotomous or polytomous items. Found without bothering to find the correlations.} } \references{ Kamata, Akihito and Bauer, Daniel J. (2008) A Note on the Relation Between Factor Analytic and Item Response Theory Models Structural Equation Modeling, 15 (1) 136-153. McDonald, Roderick P. (1999) Test theory: A unified treatment. L. Erlbaum Associates. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{William Revelle, David Condon } \note{ It is very important to note that when using \code{\link{irt.fa}} to find the discriminations, to set the sort option to be FALSE. This is now the default. Otherwise, the discriminations will not match the item order. Always under development. Suggestions for improvement are most appreciated. scoreIrt is just a wrapper to score.irt.poly and score.irt.2. The previous version had score.irt which is now deprecated as I try to move to camelCase. scoreIrt.2pl is a wrapper for irt.fa and scoreIrt. It was originally developed by David Condon. } \seealso{ \code{\link{irt.fa}} for finding the parameters. For more conventional scoring algorithms see \code{\link{scoreItems}}. \code{\link{irt.responses}} will plot the empirical response patterns for the alternative response choices for multiple choice items. For more conventional IRT estimations, see the ltm package. } \examples{ \donttest{ #not run in the interest of time, but worth doing d9 <- sim.irt(9,1000,-2.5,2.5,mod="normal") #dichotomous items test <- irt.fa(d9$items) scores <- scoreIrt(test,d9$items) scores.df <- data.frame(scores,true=d9$theta) #combine the estimates with the true thetas. pairs.panels(scores.df,pch=".", main="Comparing IRT and classical with complete data") #now show how to do this with a quasi-Rasch model tau <- irt.tau(d9$items) scores.rasch <- scoreIrt(tau,d9$items,key=rep(1,9)) scores.dfr<- data.frame(scores.df,scores.rasch) #almost identical to 2PL model! pairs.panels(scores.dfr) #with all the data, why bother ? #now delete some of the data d9$items[1:333,1:3] <- NA d9$items[334:666,4:6] <- NA d9$items[667:1000,7:9] <- NA scores <- scoreIrt(test,d9$items) scores.df <- data.frame(scores,true=d9$theta) #combine the estimates with the true thetas. pairs.panels(scores.df, pch=".", main="Comparing IRT and classical with random missing data") #with missing data, the theta estimates are noticably better. #now show how to do this with a quasi-Rasch model tau <- irt.tau(d9$items) scores.rasch <- scoreIrt(tau,d9$items,key=rep(1,9)) scores.dfr <- data.frame(scores.df,rasch = scores.rasch) pairs.panels(scores.dfr) #rasch is actually better! v9 <- sim.irt(9,1000,-2.,2.,mod="normal") #dichotomous items items <- v9$items test <- irt.fa(items) total <- rowSums(items) ord <- order(total) items <- items[ord,] #now delete some of the data - note that they are ordered by score items[1:333,5:9] <- NA items[334:666,3:7] <- NA items[667:1000,1:4] <- NA items[990:995,1:9] <- NA #the case of terrible data items[996:998,] <- 0 #all wrong items[999:1000] <- 1 #all right scores <- scoreIrt(test,items) unitweighted <- scoreIrt(items=items,keys=rep(1,9)) #each item has a discrimination of 1 #combine the estimates with the true thetas. scores.df <- data.frame(v9$theta[ord],scores,unitweighted) colnames(scores.df) <- c("True theta","irt theta","total","fit","rasch","total","fit") pairs.panels(scores.df,pch=".",main="Comparing IRT and classical with missing data") #with missing data, the theta estimates are noticably better estimates #of the generating theta than using the empirically derived factor loading weights #now show the ability to score multiple scales using keys ab.tau <- irt.tau(psychTools::ability) #first find the tau values ab.keys <- make.keys(psychTools::ability,list(g=1:16,reason=1:4, letter=5:8,matrix=9:12,rotate=13:16)) #ab.scores <- scoreIrt(stats=ab.tau, items = psychTools::ability, keys = ab.keys) #and now do it for polytomous items using 2pl bfi.scores <- scoreIrt.2pl(bfi.keys,bfi[1:25]) #compare with classical unit weighting by using scoreItems #not run in the interests of time #bfi.unit <- scoreItems(psychTools::bfi.keys,psychTools::bfi[1:25]) #bfi.df <- data.frame(bfi.scores,bfi.unit$scores) #pairs.panels(bfi.df,pch=".") bfi.irt <- scoreIrt(items=bfi[16:20]) #find irt based N scores #Specify item difficulties and discriminations from a different data set. stats <- structure(list(MR1 = c(1.4, 1.3, 1.3, 0.8, 0.7), difficulty.1 = c(-1.2, -2, -1.5, -1.2, -0.9), difficulty.2 = c(-0.1, -0.8, -0.4, -0.3, -0.1), difficulty.3 = c(0.6, -0.2, 0.2, 0.2, 0.3), difficulty.4 = c(1.5, 0.9, 1.1, 1, 1), difficulty.5 = c(2.5, 2.1, 2.2, 1.7, 1.6)), row.names = c("N1", "N2", "N3", "N4", "N5"), class = "data.frame") stats #show them bfi.new <-scoreIrt(stats,bfi[16:20]) bfi.irt <- scoreIrt(items=bfi[16:20]) cor2(bfi.new,bfi.irt) newstats <- stats newstats[2:6] <-stats[2:6 ] + 1 #change the difficulties bfi.harder <- scoreIrt(newstats,bfi[16:20]) pooled <- cbind(bfi.irt,bfi.new,bfi.harder) describe(pooled) #note that the mean scores have changed lowerCor(pooled) #and although the unit weighted scale are identical, # the irt scales differ by changing the difficulties } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models} psych/man/esem.Rd0000644000176200001440000002676214540330063013406 0ustar liggesusers\name{esem} \alias{esem} \alias{esemDiagram} \alias{esem.diagram} \alias{interbattery} \alias{cancorDiagram} \title{Perform and Exploratory Structural Equation Model (ESEM) by using factor extension techniques} \description{Structural Equation Modeling (SEM) is a powerful tool for confirming multivariate structures and is well done by the lavaan, sem, or OpenMx packages. Because they are confirmatory, SEM models test specific models. Exploratory Structural Equation Modeling (ESEM), on the other hand, takes a more exploratory approach. By using factor extension, it is possible to extend the factors of one set of variables (X) into the variable space of another set (Y). Using this technique, it is then possible to estimate the correlations between the two sets of latent variables, much the way normal SEM would do. Based upon exploratory factor analysis (EFA) this approach provides a quick and easy approach to do exploratory structural equation modeling. } \usage{ esem(r, varsX, varsY, nfX = 1, nfY = 1, n.obs = NULL, fm = "minres", rotate = "oblimin", rotateY="oblimin", plot = TRUE, cor = "cor", use = "pairwise",weight=NULL, ...) esemDiagram(esem=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE, regression=FALSE,lr=TRUE, digits=1,e.size=.1,adj=2, main="Exploratory Structural Model", ...) esem.diagram(esem=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE, regression=FALSE,lr=TRUE, digits=1,e.size=.1,adj=2, main="Exploratory Structural Model", ...) #deprecated cancorDiagram(fit, cut=.1,digits=2, nfX = NULL, nfY = NULL,simple=FALSE,e.size=.1, main="Canonical Correlation",...) interbattery(r, varsX, varsY, nfX = 1, nfY = 1, n.obs = NULL,cor = "cor", use = "pairwise",weight=NULL) } \arguments{ \item{r}{A correlation matrix or a raw data matrix suitable for factor analysis} \item{varsX}{The variables defining set X} \item{varsY}{The variables defining set Y} \item{nfX}{The number of factors to extract for the X variables} \item{nfY}{The number of factors to extract for the Y variables} \item{n.obs}{Number of observations (needed for eBIC and chi square), can be ignored.} \item{fm}{The factor method to use, e.g., "minres", "mle" etc. (see fa for details)} \item{rotate}{Which rotation to use. (see fa for details)} \item{rotateY}{Which rotatation to use for the Y variables.} \item{plot}{If TRUE, draw the esemDiagram} \item{cor}{What options for to use for correlations (see fa for details)} \item{use}{"pairwise" for pairwise complete data, for other options see cor} \item{weight}{Weights to apply to cases when finding wt.cov} \item{\dots}{other parameters to pass to fa or to esemDiagram functions.} \item{esem}{The object returned from esem and passed to esemDiagram} \item{fit}{The object returned by lmCor with canonical variates} \item{labels}{ Variable labels } \item{cut}{ Loadings with abs(loading) > cut will be shown } \item{simple}{Only the biggest loading per item is shown} \item{errors}{include error estimates (as arrows)} \item{e.size}{size of ellipses (adjusted by the number of variables)} \item{digits}{Round coefficient to digits} \item{adj}{loadings are adjusted by factor number mod adj to decrease likelihood of overlap} \item{main}{ Graphic title, defaults to "Exploratory Structural Model" } \item{lr}{draw the graphic left to right (TRUE) or top to bottom (FALSE)} \item{regression}{Not yet implemented} } \details{ Factor analysis as implemented in \code{\link{fa}} attempts to summarize the covariance (correlational) structure of a set of variables with a small set of latent variables or ``factors". This solution may be `extended' into a larger space with more variables without changing the original solution (see \code{\link{fa.extension}}. Similarly, the factors of a second set of variables (the Y set) may be extended into the original (X ) set. Doing so allows two independent measurement models, a measurement model for X and a measurement model for Y. These two sets of latent variables may then be correlated for an Exploratory Structural Equation Model. (This is exploratory because it is based upon exploratory factor analysis (EFA) rather than a confirmatory factor model (CFA) using more traditional Structural Equation Modeling packages such as sem, lavaan, or Mx.) Although the output seems very similar to that of a normal EFA using \code{\link{fa}}, it is actually two independent factor analyses (of the X and the Y sets) that are then mutually extended into each other. That is, the loadings and structure matrices from sets X and Y are merely combined, and the correlations between the two sets of factors are found. Interbattery factor analysis was developed by Tucker (1958) as a way of comparing the factors in common to two batteries of tests. (Currently under development and not yet complete). Using some straight forward linear algebra It is easy to find the factors of the intercorrelations between the two sets of variables. This does not require estimating communalities and is highly related to the procedures of canonical correlation. The difference between the esem and the interbattery approach is that the first factors the X set and then relates those factors to factors of the Y set. Interbattery factor analysis, on the other hand, tries to find one set of factors that links both sets but is still distinct from factoring both sets together. } \value{ \item{communality}{The amount of variance in each of the X and Y variables accounted for by the total model.} \item{sumsq}{The amount of variance accounted for by each factor -- independent of the other factors.} \item{dof}{Degrees of freedom of the model. } \item{esem.dof}{Alternatively consider the df1 for the X model and df2 for the Y model. esem.dof = df1 + df2} \item{null.dof}{Degrees of freedom of the null model (the correlation matrix)} \item{ENull}{chi square of the null model} \item{chi}{chi square of the model. This is found by examining the size of the residuals compared to their standard error.} \item{rms}{The root mean square of the residuals.} \item{nh}{Harmonic sample size if using min.chi for factor extraction.} \item{EPVAL}{Probability of the Emprical Chi Square given the hypothesis of an identity matrix.} \item{crms}{Adjusted root mean square residual} \item{EBIC}{When normal theory fails (e.g., in the case of non-positive definite matrices), it useful to examine the empirically derived EBIC based upon the empirical \eqn{\chi^2}{chi^2} - 2 df. } \item{ESABIC}{Sample size adjusted empirical BIC} \item{fit}{sum of squared residuals versus sum of squared original values} \item{fit.off}{fit applied to the off diagonal elements} \item{sd}{standard deviation of the residuals} \item{factors}{Number of factors extracted} \item{complexity}{Item complexity} \item{n.obs}{Number of total observations} \item{loadings}{The factor pattern matrix for the combined X and Y factors} \item{Structure}{The factor structure matrix for the combined X and Y factors} \item{loadsX}{Just the X set of loadings (pattern) without the extension variables.} \item{loadsY}{Just the Y set of loadings (pattern) without the extension variables.} \item{PhiX}{The correlations of the X factors} \item{PhiY}{the correlations of the Y factors} \item{Phi}{the correlations of the X and Y factors within the selves and across sets.} \item{fm}{The factor method used} \item{fx}{The complete factor analysis output for the X set} \item{fy}{The complete factor analysis output for the Y set} \item{residual}{The residual correlation matrix (R - model). May be examined by a call to residual().} \item{Call}{Echo back the original call to the function.} \item{model}{model code for SEM and for lavaan to do subsequent confirmatory modeling} } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Tucker, Ledyard (1958) An inter-battery method of factor analysis, Psychometrika, 23, 111-136. } \author{William Revelle} \note{Developed September, 2016, revised December, 2018 to produce code for lavaan and sem from the \code{\link{esem}} and \code{\link{esemDiagram}} functions. Suggestions or comments are most welcome. This is clearly an exploratory approach and the degrees of freedom for the model are unclear. In December 2023, I added the canCorDiagram function to help understand canonical correlations. } \seealso{ \code{\link{principal}} for principal components analysis (PCA). PCA will give very similar solutions to factor analysis when there are many variables. The differences become more salient as the number variables decrease. The PCA and FA models are actually very different and should not be confused. One is a model of the observed variables, the other is a model of latent variables. \code{\link{lmCor}} which does a canonical correlation between the X and Y sets of variables. \code{\link{irt.fa}} for Item Response Theory analyses using factor analysis, using the two parameter IRT equivalent of loadings and difficulties. \code{\link{VSS}} will produce the Very Simple Structure (VSS) and MAP criteria for the number of factors, \code{\link{nfactors}} to compare many different factor criteria. \code{\link{ICLUST}} will do a hierarchical cluster analysis alternative to factor analysis or principal components analysis. \code{\link{predict.psych}} to find predicted scores based upon new data, \code{\link{fa.extension}} to extend the factor solution to new variables, \code{\link{omega}} for hierarchical factor analysis with one general factor. \code{\link{fa.multi}} for hierarchical factor analysis with an arbitrary number of higher order factors. \code{\link{faRegression}} to do multiple regression from factor analysis solutions. \code{\link{fa.sort}} will sort the factor loadings into echelon form. \code{\link{fa.organize}} will reorganize the factor pattern matrix into any arbitrary order of factors and items. \code{\link{KMO}} and \code{\link{cortest.bartlett}} for various tests that some people like. \code{\link{factor2cluster}} will prepare unit weighted scoring keys of the factors that can be used with \code{\link{scoreItems}}. \code{\link{fa.lookup}} will print the factor analysis loadings matrix along with the item ``content" taken from a dictionary of items. This is useful when examining the meaning of the factors. \code{\link{anova.psych}} allows for testing the difference between two (presumably nested) factor models . } \examples{ #make up a sem like problem using sim.structure fx <-matrix(c( .9,.8,.6,rep(0,4),.6,.8,-.7),ncol=2) fy <- matrix(c(.6,.5,.4),ncol=1) rownames(fx) <- c("V","Q","A","nach","Anx") rownames(fy)<- c("gpa","Pre","MA") Phi <-matrix( c(1,0,.7,.0,1,.7,.7,.7,1),ncol=3) gre.gpa <- sim.structural(fx,Phi,fy) print(gre.gpa) #now esem it: example <- esem(gre.gpa$model,varsX=1:5,varsY=6:8,nfX=2,nfY=1,n.obs=1000,plot=FALSE) example esemDiagram(example,simple=FALSE) #compare this to the canonical solution mod <- lmCor(y=6:8, x =1:5, data=gre.gpa$model, plot=FALSE) cancorDiagram(mod) #does not work because of imaginary roots #compare two alternative solutions to the first 2 factors of the neo. #solution 1 is the normal 2 factor solution. #solution 2 is an esem with 1 factor for the first 6 variables, and 1 for the second 6. f2 <- fa(psychTools::neo[1:12,1:12],2) es2 <- esem(psychTools::neo,1:6,7:12,1,1) summary(f2) summary(es2) fa.congruence(f2,es2) interbattery(Thurstone.9,1:4,5:9,2,2) #compare to the solution of Tucker. We are not there yet. } \keyword{ multivariate } \keyword{ models}psych/man/factor.residuals.Rd0000644000176200001440000000323512262077524015724 0ustar liggesusers\name{factor.residuals} \alias{factor.residuals} \title{ R* = R- F F' } \description{The basic factor or principal components model is that a correlation or covariance matrix may be reproduced by the product of a factor loading matrix times its transpose. Find the residuals of the original minus the reproduced matrix. Used by \code{\link{factor.fit}}, \code{\link{VSS}}, \code{\link{ICLUST}}, etc. } \usage{ factor.residuals(r, f) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{r}{ A correlation matrix } \item{f}{ A factor model matrix or a list of class loadings} } \details{The basic factor equation is \eqn{_nR_n \approx _{n}F_{kk}F_n'+ U^2}{nRn = nFk kFn' + U2}. Residuals are just R* = R - F'F. The residuals should be (but in practice probably rarely are) examined to understand the adequacy of the factor analysis. When doing Factor analysis or Principal Components analysis, one usually continues to extract factors/components until the residuals do not differ from those expected from a random matrix. } \value{ rstar is the residual correlation matrix. } \author{ Maintainer: William Revelle } \seealso{\code{\link{fa}}, \code{\link{principal}}, \code{\link{VSS}}, \code{\link{ICLUST}}} \examples{ fa2 <- fa(Harman74.cor$cov,2,rotate=TRUE) fa2resid <- factor.residuals(Harman74.cor$cov,fa2) fa2resid[1:4,1:4] #residuals with two factors extracted fa4 <- fa(Harman74.cor$cov,4,rotate=TRUE) fa4resid <- factor.residuals(Harman74.cor$cov,fa4) fa4resid[1:4,1:4] #residuals with 4 factors extracted } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/fa.parallel.Rd0000644000176200001440000003466714551340161014643 0ustar liggesusers\name{fa.parallel} \alias{fa.parallel} \alias{fa.parallel.poly} \alias{plot.poly.parallel} \title{Scree plots of data or correlation matrix compared to random ``parallel" matrices } \description{One way to determine the number of factors or components in a data matrix or a correlation matrix is to examine the ``scree" plot of the successive eigenvalues. Sharp breaks in the plot suggest the appropriate number of components or factors to extract. ``Parallel" analysis is an alternative technique that compares the scree of factors of the observed data with that of a random data matrix of the same size as the original. This may be done for continuous , dichotomous, or polytomous data using Pearson, tetrachoric or polychoric correlations. } \usage{ fa.parallel(x,n.obs=NULL,fm="minres",fa="both",nfactors=1, main="Parallel Analysis Scree Plots", n.iter=20,error.bars=FALSE,se.bars=FALSE,SMC=FALSE,ylabel=NULL,show.legend=TRUE, sim=TRUE,quant=.95,cor="cor",use="pairwise",plot=TRUE,correct=.5,sqrt=FALSE) fa.parallel.poly(x ,n.iter=10,SMC=TRUE, fm = "minres",correct=TRUE,sim=FALSE, fa="both",global=TRUE) #deprecated \method{plot}{poly.parallel}(x,show.legend=TRUE,fa="both",...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A data.frame or data matrix of scores. If the matrix is square, it is assumed to be a correlation matrix. Otherwise, correlations (with pairwise deletion) will be found. } \item{n.obs}{n.obs=0 implies a data matrix/data.frame. Otherwise, how many cases were used to find the correlations. } \item{fm}{What factor method to use. (minres, ml, uls, wls, gls, pa) See \code{\link{fa}} for details.} \item{fa}{show the eigen values for a principal components (fa="pc") or a principal axis factor analysis (fa="fa") or both principal components and principal factors (fa="both")} \item{nfactors}{The number of factors to extract when estimating the eigen values. Defaults to 1, which was the prior value used.} \item{main}{ a title for the analysis } \item{n.iter}{Number of simulated analyses to perform} \item{use}{How to treat missing data, use="pairwise" is the default". See cor for other options.} \item{cor}{How to find the correlations: "cor" is Pearson", "cov" is covariance, "tet" is tetrachoric, "poly" is polychoric, "mixed" uses mixed cor for a mixture of tetrachorics, polychorics, Pearsons, biserials, and polyserials, Yuleb is Yulebonett, Yuleq and YuleY are the obvious Yule coefficients as appropriate. This matches the call to \code{\link{fa}}. } \item{correct}{For tetrachoric correlations, should a correction for continuity be applied. (See \code{\link{tetrachoric}}.) If set to 0, then no correction is applied, otherwise, the default is to add .5 observations to the cell.} \item{sim}{For continuous data, the default is to resample as well as to generate random normal data. If sim=FALSE, then just show the resampled results. These two results are very similar. This does not make sense in the case of correlation matrix, in which case resampling is impossible. In the case of polychoric or tetrachoric data, in addition to randomizing the real data, should we compare the solution to random simulated data. This will double the processing time, but will basically show the same result.} \item{error.bars}{Should error.bars be plotted (default = FALSE)} \item{se.bars}{Should the error bars be standard errors (se.bars=TRUE) or 1 standard deviation (se.bars=FALSE, the default). With many iterations, the standard errors are very small and some prefer to see the broader range. The default has been changed in version 1.7.8 to be se.bars=FALSE to more properly show the range.} \item{SMC}{SMC=TRUE finds eigen values after estimating communalities by using SMCs. smc = FALSE finds eigen values after estimating communalities with the first factor.} \item{ylabel}{Label for the y axis -- defaults to ``eigen values of factors and components", can be made empty to show many graphs} \item{show.legend}{the default is to have a legend. For multiple panel graphs, it is better to not show the legend} \item{quant}{if nothing is specified, the empirical eigen values are compared to the mean of the resampled or simulated eigen values. If a value (e.g., quant=.95) is specified, then the eigen values are compared against the matching quantile of the simulated data. Clearly the larger the value of quant, the few factors/components that will be identified. The default is to use quant=.95.} \item{global}{If doing polychoric analyses (fa.parallel.poly) and the number of alternatives differ across items, it is necessary to turn off the global option. fa.parallel.poly is deprecated but this choice is still relevant.} \item{...}{additional plotting parameters, for plot.poly.parallel} \item{plot}{By default, fa.parallel draws the eigen value plots. If FALSE, suppresses the graphic output} \item{sqrt}{If TRUE, take the squareroot of the eigen values to more understandably show the scale. Note, although providing more useful graphics the results will be the same. See DelGuidice, 2022} } \details{ Cattell's ``scree" test is one of most simple tests for the number of factors problem. Horn's (1965) ``parallel" analysis is an equally compelling procedure. Other procedures for determining the most optimal number of factors include finding the Very Simple Structure (VSS) criterion (\code{\link{VSS}} ) and Velicer's \code{\link{MAP}} procedure (included in \code{\link{VSS}}). Both the VSS and the MAP criteria are included in the \code{\link{nfactors}} function which also reports the mean item complexity and the BIC for each of multiple solutions. fa.parallel plots the eigen values for a principal components and the factor solution (minres by default) and does the same for random matrices of the same size as the original data matrix. For raw data, the random matrices are 1) a matrix of univariate normal data and 2) random samples (randomized across rows) of the original data. \code{\link{fa.parallel}} with the cor=poly option will do what \code{\link{fa.parallel.poly}} explicitly does: parallel analysis for polychoric and tetrachoric factors. If the data are dichotomous, \code{\link{fa.parallel.poly}} will find tetrachoric correlations for the real and simulated data, otherwise, if the number of categories is less than 10, it will find polychoric correlations. Note that fa.parallel.poly is slower than fa.parallel because of the complexity of calculating the tetrachoric/polychoric correlations. The functionality of \code{\link{fa.parallel.poly}} is included in \code{\link{fa.parallel}} with cor=poly option (etc.) option but the older \code{\link{fa.parallel.poly}} is kept for those who call it directly. That is, \code{\link{fa.parallel}} now will do tetrachorics or polychorics directly if the cor option is set to "tet" or "poly". As with \code{\link{fa.parallel.poly}} this will take longer. The means of (ntrials) random solutions are shown. Error bars are usually very small and are suppressed by default but can be shown if requested. If the sim option is set to TRUE (default), then parallel analyses are done on resampled data as well as random normal data. In the interests of speed, the parallel analyses are done just on resampled data if sim=FALSE. Both procedures tend to agree. As of version 1.5.4, I added the ability to specify the quantile of the simulated/resampled data, and to plot standard deviations or standard errors. By default, this is set to the 95th percentile. Alternative ways to estimate the number of factors problem are discussed in the Very Simple Structure (Revelle and Rocklin, 1979) documentation (\code{\link{VSS}}) and include Wayne Velicer's \code{\link{MAP}} algorithm (Veicer, 1976). Parallel analysis for factors is actually harder than it seems, for the question is what are the appropriate communalities to use. If communalities are estimated by the Squared Multiple Correlation (SMC) \code{\link{smc}}, then the eigen values of the original data will reflect major as well as minor factors (see \code{\link{sim.minor}} to simulate such data). Random data will not, of course, have any structure and thus the number of factors will tend to be biased upwards by the presence of the minor factors. By default, fa.parallel estimates the communalities based upon a one factor minres solution. Although this will underestimate the communalities, it does seem to lead to better solutions on simulated or real (e.g., the \code{\link[psychTools]{bfi}} or Harman74) data sets. For comparability with other algorithms (e.g, the paran function in the paran package), setting smc=TRUE will use smcs as estimates of communalities. This will tend towards identifying more factors than the default option. Yet another option (suggested by Florian Scharf) is to estimate the eigen values based upon a particular factor model (e.g., specify nfactors > 1). Printing the results will show the eigen values of the original data that are greater than simulated values. A sad observation about parallel analysis is that it is sensitive to sample size. That is, for large data sets, the eigen values of random data are very close to 1. This will lead to different estimates of the number of factors as a function of sample size. Consider factor structure of the bfi data set (the first 25 items are meant to represent a five factor model). For samples of 200 or less, parallel analysis suggests 5 factors, but for 1000 or more, six factors and components are indicated. This is not due to an instability of the eigen values of the real data, but rather the closer approximation to 1 of the random data as n increases. Although with nfactors=1, 6 factors are suggested, when specifying nfactors =5, parallel analysis of the bfi suggests 12 factors should be extracted! When simulating dichotomous data in fa.parallel.poly, the simulated data have the same difficulties as the original data. This functionally means that the simulated and the resampled results will be very similar. Note that fa.parallel.poly has functionally been replaced with fa.parallel with the cor="poly" option. As with many psych functions, fa.parallel has been changed to allow for multicore processing. For running a large number of iterations, it is obviously faster to increase the number of cores to the maximum possible (using the options("mc.cores=n) command where n is determined from detectCores(). } \value{ A plot of the eigen values for the original data, ntrials of resampling of the original data, and of a equivalent size matrix of random normal deviates. If the data are a correlation matrix, specify the number of observations. Also returned (invisibly) are: \item{fa.values}{The eigen values of the factor model for the real data.} \item{fa.sim}{The descriptive statistics of the simulated factor models.} \item{pc.values}{The eigen values of a principal components of the real data.} \item{pc.sim}{The descriptive statistics of the simulated principal components analysis.} \item{nfact}{Number of factors with eigen values > eigen values of random data} \item{ncomp}{Number of components with eigen values > eigen values of random data} \item{values}{The simulated values for all simulated trials} } \note{Although by default the test is applied to the 95th percentile eigen values, this can be modified by setting the quant parameter to any particular quantile. The actual simulated data are also returned (invisibly) in the value object. Thus, it is possible to do descriptive statistics on those to choose a preferred comparison. See the last example (not run) The simulated and resampled data tend to be very similar, so for a slightly cleaner figure, set sim=FALSE. For relatively small samples with dichotomous data and cor="tet" if some cells are empty, or if the resampled matrices are not positive semi-definite, warnings are issued. this leads to serious problems if using multi.cores (the default if using a Mac). The solution seems to be to not use multi.cores (e.g., options(mc.cores =1) ) } \references{ Del Giudice, M. (2022, October 21). The Square-Root Scree Plot: A Simple Improvement to a Classic Display. doi: 10.31234/osf.io/axubd Floyd, Frank J. and Widaman, Keith. F (1995) Factor analysis in the development and refinement of clinical assessment instruments. Psychological Assessment, 7(3):286-299, 1995. Horn, John (1965) A rationale and test for the number of factors in factor analysis. Psychometrika, 30, 179-185. Humphreys, Lloyd G. and Montanelli, Richard G. (1975), An investigation of the parallel analysis criterion for determining the number of common factors. Multivariate Behavioral Research, 10, 193-205. Revelle, William and Rocklin, Tom (1979) Very simple structure - alternative procedure for estimating the optimal number of interpretable factors. Multivariate Behavioral Research, 14(4):403-414. Velicer, Wayne. (1976) Determining the number of components from the matrix of partial correlations. Psychometrika, 41(3):321-327, 1976. } \author{ William Revelle } \note{Gagan Atreya reports a problem with the multi-core implementation of fa.parallel when running Microsoft Open R. This can be resolved by setMKLthreads(1) to set the number of threads to 1. } \seealso{ \code{\link{fa}}, \code{\link{nfactors}}, \code{\link{VSS}}, \code{\link{VSS.plot}}, \code{\link{VSS.parallel}}, \code{\link{sim.minor}}} \examples{ #test.data <- Harman74.cor$cov #The 24 variable Holzinger - Harman problem #fa.parallel(test.data,n.obs=145) fa.parallel(Thurstone,n.obs=213) #the 9 variable Thurstone problem #set.seed(123) #minor <- sim.minor(24,4,400) #4 large and 12 minor factors #ffa.parallel(minor$observed) #shows 5 factors and 4 components -- compare with #fa.parallel(minor$observed,SMC=FALSE) #which shows 6 and 4 components factors #a demonstration of parallel analysis of a dichotomous variable #fp <- fa.parallel(psychTools::ability) #use the default Pearson correlation #fpt <- fa.parallel(psychTools::ability,cor="tet") #do a tetrachoric correlation #fpt <- fa.parallel(psychTools::ability,cor="tet",quant=.95) #do a tetrachoric correlation and #use the 95th percentile of the simulated results #apply(fp$values,2,function(x) quantile(x,.95)) #look at the 95th percentile of values #apply(fpt$values,2,function(x) quantile(x,.95)) #look at the 95th percentile of values #describe(fpt$values) #look at all the statistics of the simulated values } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/scrub.Rd0000644000176200001440000000536514152314152013567 0ustar liggesusers\name{scrub} \alias{scrub} \title{A utility for basic data cleaning and recoding. Changes values outside of minimum and maximum limits to NA. } \description{A tedious part of data analysis is addressing the problem of miscoded data that need to be converted to NA or some other value. For a given data.frame or matrix, scrub will set all values of columns from=from to to=to that are less than a set (vector) of min values or more than a vector of max values to NA. Can also be used to do basic recoding of data for all values=isvalue to newvalue. Will also recode continuus variables into fewer categories. Will convert Nan, -Inf and Inf to NA The length of the where, isvalue, and newvalues must either match, or be 1. } \usage{ scrub(x, where, min, max,isvalue,newvalue, cuts=NULL) } \arguments{ \item{x}{a data frame or matrix} \item{where}{The variables to examine. (Can be by name or by column number)} \item{min}{a vector of minimum values that are acceptable} \item{max}{a vector of maximum values that are acceptable} \item{isvalue}{a vector of values to be converted to newvalue (one per variable)} \item{newvalue}{a vector of values to replace those that match isvalue} \item{cuts}{The lower,and upper boundaries for recoding }} \details{Solves a tedious problem that can be done directly but that is sometimes awkward. Will either replace specified values with NA or will recode to values within a range. } \value{ The corrected data frame.} \author{William Revelle } \seealso{ \code{\link{reverse.code}}, \code{\link{rescale}} for other simple utilities. } \examples{ data(attitude) x <- scrub(attitude,isvalue=55) #make all occurrences of 55 NA x1 <- scrub(attitude, where=c(4,5,6), isvalue =c(30,40,50), newvalue = c(930,940,950)) #will do this for the 4th, 5th, and 6th variables x2 <- scrub(attitude, where=c(4,4,4), isvalue =c(30,40,50), newvalue = c(930,940,950)) #will just do it for the 4th column new <- scrub(attitude,1:3,cuts= c(10,40,50,60,100)) #change many values to fewer #get rid of a complicated set of cases and replace with missing values y <- scrub(attitude,where=2:4,min=c(20,30,40),max= c(120,110,100),isvalue= c(32,43,54)) y1 <- scrub(attitude,where="learning",isvalue=55,newvalue=999) #change a column by name y2 <- scrub(attitude,where="learning",min=45,newvalue=999) #change a column by name y3 <- scrub(attitude,where="learning",isvalue=c(45,48), newvalue=999) #change a column by name look for multiple values in that column y4 <- scrub(attitude,where="learning",isvalue=c(45,48), newvalue= c(999,-999)) #change values in one column to one of two different things } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } %code is in reverse.code psych/man/schmid.Rd0000644000176200001440000001077214262120556013723 0ustar liggesusers\name{schmid} \alias{schmid} \title{Apply the Schmid Leiman transformation to a correlation matrix} \description{One way to find omega is to do a factor analysis of the original data set, rotate the factors obliquely, do a Schmid Leiman transformation, and then find omega. Here is the code for Schmid Leiman. The S-L transform takes a factor or PC solution, transforms it to an oblique solution, factors the oblique solution to find a higher order (g ) factor, and then residualizes g out of the the group factors. } \usage{ schmid(model, nfactors = 3, fm = "minres",digits=2,rotate="oblimin", n.obs=NA,option="equal",Phi=NULL,covar=FALSE,two.ok=FALSE,...) } \arguments{ \item{model}{ A correlation matrix } \item{nfactors}{ Number of factors to extract } \item{fm}{the default is to do minres. fm="pa" for principal axes, fm="pc" for principal components, fm = "minres" for minimum residual (OLS), pc="ml" for maximum likelihood } \item{digits}{if digits not equal NULL, rounds to digits} \item{rotate}{The default, oblimin, produces somewhat more correlated factors than the alternative, simplimax. Other options include Promax (not Kaiser normalized) or promax (Promax with Kaiser normalization). See \code{\link{fa}} for possible oblique rotations.} \item{n.obs}{Number of observations, used to find fit statistics if specified. Will be calculated if input is raw data} \item{option}{When asking for just two group factors, option can be for "equal", "first" or "second"} \item{Phi}{If Phi is specified, then the analysis is done on a pattern matrix with the associated factor intercorrelation (Phi) matrix. This allows for reanalysess of published results} \item{covar}{Defaults to FALSE and finds correlations. If set to TRUE, then do the calculations on the unstandardized variables.} \item{two.ok}{If TRUE, do not give a warning about three factors being required.} \item{...}{Allows additional parameters to be passed to the factoring routines} } \details{Schmid Leiman orthogonalizations are typical in the ability domain, but are not seen as often in the non-cognitive personality domain. S-L is one way of finding the loadings of items on the general factor for estimating omega. A typical example would be in the study of anxiety and depression. A general neuroticism factor (g) accounts for much of the variance, but smaller group factors of tense anxiety, panic disorder, depression, etc. also need to be considerd. An alternative model is to consider hierarchical cluster analysis techniques such as \code{\link{ICLUST}}. Requires the GPArotation package. Although 3 factors are the minimum number necessary to define the solution uniquely, it is occasionally useful to allow for a two factor solution. There are three possible options for this condition: setting the general factor loadings between the two lower order factors to be "equal" which will be the sqrt(oblique correlations between the factors) or to "first" or "second" in which case the general factor is equated with either the first or second group factor. A message is issued suggesting that the model is not really well defined. A diagnostic tool for testing the appropriateness of a hierarchical model is p2 which is the percent of the common variance for each variable that is general factor variance. In general, p2 should not have much variance. An alternative approach is to use the \code{\link{directSl}} function to do a direct Schmid Leiman transformation (code adapted from Niels Waller). } \value{ \item{sl }{loadings on g + nfactors group factors, communalities, uniqueness, percent of g2 of h2} \item{orthog }{original orthogonal factor loadings} \item{oblique}{oblique factor loadings} \item{phi }{correlations among the transformed factors} \item{gload }{loadings of the lower order factors on g} ... } \references{\url{https://personality-project.org/r/r.omega.html} gives an example taken from Jensen and Weng, 1994 of a S-L transformation.} \author{ William Revelle} \seealso{ \code{\link{omega}}, \code{\link{directSl}}, \code{\link{omega.graph}}, \code{\link{fa.graph}}, \code{\link{ICLUST}},\code{\link{VSS}}} \examples{ jen <- sim.hierarchical() #create a hierarchical demo if(!require(GPArotation)) { message("I am sorry, you must have GPArotation installed to use schmid.")} else { p.jen <- schmid(jen,digits=2) #use the oblimin rotation p.jen <- schmid(jen,rotate="promax") #use the promax rotation } } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/dwyer.Rd0000644000176200001440000000154311546641045013605 0ustar liggesusers\name{Dwyer} \alias{Dwyer} \docType{data} \title{8 cognitive variables used by Dwyer for an example. } \description{Dwyer (1937) introduced a technique for factor extension and used 8 cognitive variables from Thurstone. This is the example data set used in his paper. } \usage{data(Dwyer)} \format{ The format is: num [1:8, 1:8] 1 0.58 -0.28 0.01 0.36 0.38 0.61 0.15 0.58 1 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:8] "V1" "V2" "V3" "V4" ... ..$ : chr [1:8] "V1" "V2" "V3" "V4" ... } \source{Data matrix retyped from the original publication. } \references{Dwyer, Paul S. (1937), The determination of the factor loadings of a given test from the known factor loadings of other tests. Psychometrika, 3, 173-178 } \examples{ data(Dwyer) Ro <- Dwyer[1:7,1:7] Roe <- Dwyer[1:7,8] fo <- fa(Ro,2,rotate="none") fa.extension(Roe,fo) } \keyword{datasets} psych/man/scoreWtd.Rd0000644000176200001440000000477314526764313014262 0ustar liggesusers\name{scoreWtd} \alias{scoreWtd} %- Also NEED an '\alias' for EACH other topic documented here. \title{Score items using regression or correlation based weights} \description{Item weights from \code{\link{bestScales}} or \code{\link{lmCor}} are used to find weighted scale scores. In contrast to the unit weights used in \code{\link{scoreItems}}, \code{\link{scoreWtd}} will multiply the data by a set of weights to find scale scores. These weight may come from a regression (e.g., \code{\link{lm}} or \code{\link{lmCor}}) or may be the zero order correlation weights from \code{\link{bestScales}}. } \usage{ scoreWtd(weights, items, std = TRUE, sums = FALSE, impute = "none") } \arguments{ \item{weights}{This is just a matrix of weights to use for each item for each scale.} \item{items}{ Matrix or dataframe of raw item scores} \item{std}{if TRUE, then find weighted standard scores else just use raw data} \item{sums}{By default, find the average item score. If sums = TRUE, then find the sum scores. This is useful for regression with an intercept term} \item{impute}{impute="median" replaces missing values with the item medians, impute = "mean" replaces values with the mean response. impute="none" the subject's scores are based upon the average of the keyed, but non missing scores. impute = "none" is probably more appropriate for a large number of missing cases (e.g., SAPA data). } } \details{Although meant for finding correlation weighted scores using the weights from \code{\link{bestScales}}, it also possible to use alternative weight matrices, such as those returned by the coefficients in \code{\link{lm}}. } \value{ A data frame of scores.} \author{William Revelle} \seealso{ \code{\link{bestScales}} and \code{\link{lmCor}} } \examples{ #find the weights from a regression model and then apply them to a new set #derivation of weights from the first 20 cases model.lm <- lm(rating ~ complaints + privileges + learning,data=attitude[1:20,]) #or use lmCor to find the coefficents model <- lmCor(rating ~ complaints + privileges +learning,data=attitude[1:20,],std=FALSE) #Apply these to a different set of data (the last 10 cases) #note that the regression coefficients need to be a matrix scores.lm <- scoreWtd(as.matrix(model.lm$coefficients),attitude[21:30,],sums=TRUE,std=FALSE) scores <- scoreWtd(model$coefficients,attitude[21:30,],sums=TRUE,std=FALSE) describe(scores) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{models}psych/man/residuals.psych.Rd0000644000176200001440000000211013152330500015544 0ustar liggesusers\name{residuals.psych} \alias{residuals.psych} \alias{resid.psych} \title{Extract residuals from various psych objects} \description{Residuals in the various psych functions are extracted and then may be "pretty" printed.} \usage{ \method{residuals}{psych}(object,diag=TRUE,...) \method{resid}{psych}(object,diag=TRUE,...) } \arguments{ \item{object}{The object returned by a psych function.} \item{diag}{if FALSE, then convert the diagonal of the residuals to NA} \item{...}{Other parameters to be passed to residual (ignored but required by the generic function)} } \details{ Currently implemented for \code{\link{fa}}, \code{\link{principal}}, \code{\link{omega}}, \code{\link{irt.fa}}, and \code{\link{fa.extension}}. } \value{ residuals: a matrix of residual estimates} \author{William Revelle } \examples{ f3 <- fa(Thurstone,3) residuals(f3) sum(residuals(f3)^2) #include diagonal sum(residuals(f3,diag=FALSE)^2,na.rm=TRUE) #drop diagonal } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models}psych/man/cor.wt.Rd0000644000176200001440000000501011767411175013664 0ustar liggesusers\name{cor.wt} \alias{cor.wt} \title{The sample size weighted correlation may be used in correlating aggregated data} \description{If using aggregated data, the correlation of the means does not reflect the sample size used for each mean. cov.wt in RCore does this and returns a covariance matrix or the correlation matrix. The cor.wt function weights by sample size or by standard errors and by default return correlations. } \usage{ cor.wt(data,vars=NULL, w=NULL,sds=NULL, cor=TRUE) } \arguments{ \item{data}{A matrix or data frame} \item{vars}{Variables to analyze} \item{w}{A set of weights (e.g., the sample sizes)} \item{sds}{Standard deviations of the samples (used if weighting by standard errors)} \item{cor}{Report correlations (the default) or covariances} } \details{A weighted correlation is just \eqn{ r_{ij} = \frac{\sum(wt_{k} (x_{ik} - x_{jk})}{\sqrt{wt_{ik} \sum(x_{ik}^2) wt_jk \sum(x_{jk}^2)}} }{\sum (wt_k * (x_ik - x_jk)) /sqrt[wt_k \sum(x^2_ik) wt_k \sum(x^2_jk)]} where \eqn{x_{ik}}{x_ik} is a deviation from the weighted mean. The weighted correlation is appropriate for correlating aggregated data, where individual data points might reflect the means of a number of observations. In this case, each point is weighted by its sample size (or alternatively, by the standard error). If the weights are all equal, the correlation is just a normal Pearson correlation. Used when finding correlations of group means found using \code{\link{statsBy}}. } \value{ \item{cor }{The weighted correlation} \item{xwt}{The data as weighted deviations from the weighted mean } \item{wt}{The weights used (calculated from the sample sizes).} \item{mean}{The weighted means} \item{xc}{Unweighted, centered deviation scores from the weighted mean} \item{xs}{Deviation scores weighted by the standard error of each sample mean} } \author{William Revelle } \note{A generalization of \code{\link{cov.wt}} in core R} \seealso{ See Also as \code{\link{cov.wt}}, \code{\link{statsBy}} } \examples{ means.by.age <- statsBy(sat.act,"age") wt.cors <- cor.wt(means.by.age) lowerMat(wt.cors$r) #show the weighted correlations unwt <- lowerCor(means.by.age$mean) mixed <- lowerUpper(unwt,wt.cors$r) #combine both results cor.plot(mixed,TRUE,main="weighted versus unweighted correlations") diff <- lowerUpper(unwt,wt.cors$r,TRUE) cor.plot(diff,TRUE,main="differences of weighted versus unweighted correlations") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/00.psych-package.Rd0000644000176200001440000010675614526767126015436 0ustar liggesusers\name{00.psych} \alias{psych} \alias{psych-package} \docType{package} \title{A package for personality, psychometric, and psychological research} \description{Overview of the psych package. The psych package has been developed at Northwestern University to include functions most useful for personality and psychological research. Some of the functions (e.g., \code{\link[psychTools]{read.file}}, \code{\link[psychTools]{read.clipboard}}, \code{\link{describe}}, \code{\link{pairs.panels}}, \code{\link{error.bars}} and \code{\link{error.dots}}) are useful for basic data entry and descriptive analyses. Use help(package="psych") or objects("package:psych") for a list of all functions. Two vignettes are included as part of the package. The intro vignette tells how to install psych and overview vignette provides examples of using psych in many applications. In addition, there are a growing set of tutorials available on the \url{https://personality-project.org/r/} webpages. A companion package \code{\link[psychTools]{psychTools}} includes larger data set examples and four more vignette. Psychometric applications include routines (\code{\link{fa}} for maximum likelihood (fm="mle"), minimum residual (fm="minres"), minimum rank (fm=minrank) principal axes (fm="pa") and weighted least squares (fm="wls") factor analysis as well as functions to do Schmid Leiman transformations (\code{\link{schmid}}) to transform a hierarchical factor structure into a bifactor solution. Principal Components Analysis (\code{\link{pca}}) is also available. Rotations may be done using factor or components transformations to a target matrix include the standard Promax transformation (\code{\link{Promax}}), a transformation to a cluster target, or to any simple target matrix (\code{\link{target.rot}}) as well as the ability to call many of the GPArotation functions (e.g., oblimin, quartimin, varimax, geomin, ...). Functions for determining the number of factors in a data matrix include Very Simple Structure (\code{\link{VSS}}) and Minimum Average Partial correlation (\code{\link{MAP}}). An alternative approach to factor analysis is Item Cluster Analysis (\code{\link{ICLUST}}). This function is particularly appropriate for exploratory scale construction. There are a number of functions for finding various reliability coefficients (see Revelle and Condon, 2019). These include the traditional \code{\link{alpha}} (found for multiple scales and with more useful output by \code{\link{scoreItems}}, \code{\link{score.multiple.choice}}), beta (\code{\link{ICLUST}}) and both of McDonald's omega coefficients (\code{\link{omega}}, \code{\link{omegaSem}} and \code{\link{omega.diagram}}) as well as Guttman's six estimates of internal consistency reliability (\code{\link{guttman}}) and the six measures of Intraclass correlation coefficients (\code{\link{ICC}}) discussed by Shrout and Fleiss are also available. Multilevel analyses may be done by \code{\link{statsBy}} and \code{\link{multilevel.reliability}}. The \code{\link{scoreItems}}, and \code{\link{score.multiple.choice}} functions may be used to form single or multiple scales from sets of dichotomous, multilevel, or multiple choice items by specifying scoring keys. \code{\link{scoreOverlap}} correct interscale correlations for overlapping items, so that it is possible to examine hierarchical or nested structures. Scales can be formed that best predict (after cross validation) particular criteria using \code{\link{bestScales}} using unit weighted or correlation weights. This procedure, also called the \code{\link{BISCUIT}} algorithm (Best Items Scales that are Cross validated, Unit weighted, Informative, and Transparent) is a simple alternative to more complicated supervised machine learning algorithms. Additional functions make for more convenient descriptions of item characteristics include 1 and 2 parameter Item Response measures. The \code{\link{tetrachoric}}, \code{\link{polychoric}} and \code{\link{irt.fa}} functions are used to find 2 parameter descriptions of item functioning. \code{\link{scoreIrt}}, \code{\link{scoreIrt.1pl}} and \code{\link{scoreIrt.2pl}} do basic IRT based scoring. A number of procedures have been developed as part of the Synthetic Aperture Personality Assessment (SAPA \url{https://www.sapa-project.org/}) project. These routines facilitate forming and analyzing composite scales equivalent to using the raw data but doing so by adding within and between cluster/scale item correlations. These functions include extracting clusters from factor loading matrices (\code{\link{factor2cluster}}), synthetically forming clusters from correlation matrices (\code{\link{cluster.cor}}), and finding multiple ((\code{\link{lmCor}}) and partial ((\code{\link{partial.r}}) correlations from correlation matrices. If forming empirical scales, or testing out multiple regressions, it is important to cross validate the results. \code{\link{crossValidation}} will do this on a different data set. \code{\link{lmCor}} and \code{\link{mediate}} meet the desire to do regressions and mediation analysis from either raw data or from correlation matrices. If raw data are provided, these functions can also do moderation analyses. Functions to generate simulated data with particular structures include \code{\link{sim.circ}} (for circumplex structures), \code{\link{sim.item}} (for general structures) and \code{\link{sim.congeneric}} (for a specific demonstration of congeneric measurement). The functions \code{\link{sim.congeneric}} and \code{\link{sim.hierarchical}} can be used to create data sets with particular structural properties. A more general form for all of these is \code{\link{sim.structural}} for generating general structural models. These are discussed in more detail in the vignette (psych_for_sem). Functions to apply various standard statistical tests include \code{\link{p.rep}} and its variants for testing the probability of replication, \code{\link{r.con}} for the confidence intervals of a correlation, and \code{\link{r.test}} to test single, paired, or sets of correlations. In order to study diurnal or circadian variations in mood, it is helpful to use circular statistics. Functions to find the circular mean (\code{\link{circadian.mean}}), circular (phasic) correlations (\code{\link{circadian.cor}}) and the correlation between linear variables and circular variables (\code{\link{circadian.linear.cor}}) supplement a function to find the best fitting phase angle (\code{\link{cosinor}}) for measures taken with a fixed period (e.g., 24 hours). A dynamic model of personality and motivation (the Cues-Tendency-Actions model) is include as \code{\link{cta}}. A number of useful helper functions allow for data input (\code{\link[psychTools]{read.file}}), and data manipulation \code{\link{cs}} and \code{\link[psychTools]{dfOrder}}, The most recent development version of the package is always available for download as a \emph{source} file from the repository at the PMC lab: install.packages("psych", repos = "https://personality-project.org/r/", type="source"). This will provide the most recent version for PCs and Macs. } \details{Two vignettes (intro.pdf and scoring.pdf) are useful introductions to the package. They may be found as vignettes in R or may be downloaded from \url{https://personality-project.org/r/psych/intro.pdf} \url{https://personality-project.org/r/psych/overview.pdf} and \url{https://personality-project.org/r/psych/psych_for_sem.pdf}. In addition, there are a number of "HowTo"s available at \url{https://personality-project.org/r/} The more important functions in the package are for the analysis of multivariate data, with an emphasis upon those functions useful in scale construction of item composites. However, there are a number of very useful functions for basic data manipulation including \code{\link[psychTools]{read.file}}, \code{\link[psychTools]{read.clipboard}}, \code{\link{describe}}, \code{\link{pairs.panels}}, \code{\link{error.bars}} and \code{\link{error.dots}}) which are useful for basic data entry and descriptive analyses. When given a set of items from a personality inventory, one goal is to combine these into higher level item composites. This leads to several questions: 1) What are the basic properties of the data? \code{\link{describe}} reports basic summary statistics (mean, sd, median, mad, range, minimum, maximum, skew, kurtosis, standard error) for vectors, columns of matrices, or data.frames. \code{\link{describeBy}} provides descriptive statistics, organized by one or more grouping variables. \code{\link{statsBy}} provides even more detail for data structured by groups including within and between correlation matrices, ICCs for group differences, as well as basic descriptive statistics organized by group. \code{\link{pairs.panels}} shows scatter plot matrices (SPLOMs) as well as histograms and the Pearson correlation for scales or items. \code{\link{error.bars}} will plot variable means with associated confidence intervals. \code{\link{errorCircles}} will plot confidence intervals for both the x and y coordinates. \code{\link{corr.test}} will find the significance values for a matrix of correlations. \code{\link{error.dots}} creates a dot chart with confidence intervals. 2) What is the most appropriate number of item composites to form? After finding either standard Pearson correlations, or finding tetrachoric or polychoric correlations, the dimensionality of the correlation matrix may be examined. The number of factors/components problem is a standard question of factor analysis, cluster analysis, or principal components analysis. Unfortunately, there is no agreed upon answer. The Very Simple Structure (\code{\link{VSS}}) set of procedures has been proposed as on answer to the question of the optimal number of factors. Other procedures (\code{\link{VSS.scree}}, \code{\link{VSS.parallel}}, \code{\link{fa.parallel}}, and \code{\link{MAP}}) also address this question. \code{\link{nfactors}} combine several of these approaches into one convenient function. Unfortunately, there is no best answer to the problem. 3) What are the best composites to form? Although this may be answered using principal components (\code{\link{principal}}, aka \code{\link{pca}}), principal axis (\code{\link{factor.pa}}) or minimum residual (\code{\link{factor.minres}}) factor analysis (all part of the \code{\link{fa}} function) and to show the results graphically (\code{\link{fa.diagram})}, it is sometimes more useful to address this question using cluster analytic techniques. Previous versions of \code{\link{ICLUST}} (e.g., Revelle, 1979) have been shown to be particularly successful at forming maximally consistent and independent item composites. Graphical output from \code{\link{ICLUST.graph}} uses the Graphviz dot language and allows one to write files suitable for Graphviz. If Rgraphviz is available, these graphs can be done in R. Graphical organizations of cluster and factor analysis output can be done using \code{\link{cluster.plot}} which plots items by cluster/factor loadings and assigns items to that dimension with the highest loading. 4) How well does a particular item composite reflect a single construct? This is a question of reliability and general factor saturation. Multiple solutions for this problem result in (Cronbach's) alpha (\code{\link{alpha}}, \code{\link{scoreItems}}), (Revelle's) Beta (\code{\link{ICLUST}}), and (McDonald's) \code{\link{omega}} (both omega hierarchical and omega total). Additional reliability estimates may be found in the \code{\link{guttman}} function. This can also be examined by applying \code{\link{irt.fa}} Item Response Theory techniques using factor analysis of the \code{\link{tetrachoric}} or \code{\link{polychoric}} correlation matrices and converting the results into the standard two parameter parameterization of item difficulty and item discrimination. Information functions for the items suggest where they are most effective. 5) For some applications, data matrices are synthetically combined from sampling different items for different people. So called Synthetic Aperture Personality Assessement (SAPA) techniques allow the formation of large correlation or covariance matrices even though no one person has taken all of the items. To analyze such data sets, it is easy to form item composites based upon the covariance matrix of the items, rather than original data set. These matrices may then be analyzed using a number of functions (e.g., \code{\link{cluster.cor}}, \code{\link{fa}}, \code{\link{ICLUST}}, \code{\link{pca}}, \code{\link{mat.regress}}, and \code{\link{factor2cluster}}. 6) More typically, one has a raw data set to analyze. \code{\link{alpha}} will report several reliablity estimates as well as item-whole correlations for items forming a single scale, \code{\link{score.items}} will score data sets on multiple scales, reporting the scale scores, item-scale and scale-scale correlations, as well as coefficient alpha, alpha-1 and G6+. Using a `keys' matrix (created by \code{\link{make.keys}} or by hand), scales can have overlapping or independent items. \code{\link{score.multiple.choice}} scores multiple choice items or converts multiple choice items to dichtomous (0/1) format for other functions. If the scales have overlapping items, then \code{\link{scoreOverlap}} will give similar statistics, but correcting for the item overlap. 7) The \code{\link{reliability}} function combines the output from several different ways to estimate reliability including \code{\link{omega}} and \code{\link{splitHalf}}. 8) In addition to classical test theory (CTT) based scores of either totals or averages, 1 and 2 parameter IRT based scores may be found with \code{\link{scoreIrt.1pl}}, \code{\link{scoreIrt.2pl}} or more generally \code{\link{scoreIrt}}. Although highly correlated with CTT estimates, these scores take advantage of different item difficulties and are particularly appropriate for the problem of missing data. 9) If the data has a multilevel structure (e.g, items nested within time nested within subjects) the \code{\link{multilevel.reliability}} aka \code{\link{mlr}} function will estimate generalizability coefficients for data over subjects, subjects over time, etc. \code{\link{mlPlot}} will provide plots for each subject of items over time. \code{\link{mlArrange}} takes the conventional wide output format and converts it to the long format necessary for some multilevel functions. Other functions useful for multilevel data include \code{\link{statsBy}} and \code{\link{faBy}}. An additional set of functions generate simulated data to meet certain structural properties. \code{\link{sim.anova}} produces data simulating a 3 way analysis of variance (ANOVA) or linear model with or with out repeated measures. \code{\link{sim.item}} creates simple structure data, \code{\link{sim.circ}} will produce circumplex structured data, \code{\link{sim.dichot}} produces circumplex or simple structured data for dichotomous items. These item structures are useful for understanding the effects of skew, differential item endorsement on factor and cluster analytic soutions. \code{\link{sim.structural}} will produce correlation matrices and data matrices to match general structural models. (See the vignette). When examining personality items, some people like to discuss them as representing items in a two dimensional space with a circumplex structure. Tests of circumplex fit \code{\link{circ.tests}} have been developed. When representing items in a circumplex, it is convenient to view them in \code{\link{polar}} coordinates. Additional functions for testing the difference between two independent or dependent correlation \code{\link{r.test}}, to find the \code{\link{phi}} or \code{\link{Yule}} coefficients from a two by table, or to find the confidence interval of a correlation coefficient. Many data sets are included: \code{\link[psychTools]{bfi}} represents 25 personality items thought to represent five factors of personality, \code{\link[psychTools]{ability}} has 14 multiple choice iq items. \code{\link{sat.act}} has data on self reported test scores by age and gender. \code{\link[psychTools]{galton} } Galton's data set of the heights of parents and their children. \code{\link[psychTools]{peas}} recreates the original Galton data set of the genetics of sweet peas. \code{\link[psychTools]{heights}} and \code{\link[psychTools]{cubits}} provide even more Galton data, \code{\link[psychTools]{vegetables}} provides the Guilford preference matrix of vegetables. \code{\link[psychTools]{cities}} provides airline miles between 11 US cities (demo data for multidimensional scaling). Partial Index (to see the entire index, see the link at the bottom of every help page) \link{psych} A package for personality, psychometric, and psychological research.\cr Useful data entry and descriptive statistics\cr \tabular{ll}{ \link[psychTools]{read.file} \tab search for, find, and read from file\cr \link[psychTools]{read.clipboard} \tab shortcut for reading from the clipboard\cr \link[psychTools]{read.clipboard.csv} \tab shortcut for reading comma delimited files from clipboard \cr \link[psychTools]{read.clipboard.lower} \tab shortcut for reading lower triangular matrices from the clipboard\cr \link[psychTools]{read.clipboard.upper} \tab shortcut for reading upper triangular matrices from the clipboard\cr \link{describe} \tab Basic descriptive statistics useful for psychometrics\cr \link{describe.by} \tab Find summary statistics by groups\cr \link{statsBy} \tab Find summary statistics by a grouping variable, including within and between correlation matrices. \cr \link{mlArrange} \tab Change multilevel data from wide to long format\cr \link{headtail} \tab combines the head and tail functions for showing data sets\cr \link{pairs.panels} \tab SPLOM and correlations for a data matrix\cr \link{corr.test} \tab Correlations, sample sizes, and p values for a data matrix\cr \link{cor.plot} \tab graphically show the size of correlations in a correlation matrix\cr \link{multi.hist} \tab Histograms and densities of multiple variables arranged in matrix form\cr \link{skew} \tab Calculate skew for a vector, each column of a matrix, or data.frame\cr \link{kurtosi} \tab Calculate kurtosis for a vector, each column of a matrix or dataframe\cr \link{geometric.mean} \tab Find the geometric mean of a vector or columns of a data.frame \cr \link{harmonic.mean} \tab Find the harmonic mean of a vector or columns of a data.frame \cr \link{error.bars} \tab Plot means and error bars \cr \link{error.bars.by} \tab Plot means and error bars for separate groups\cr \link{error.crosses} \tab Two way error bars \cr \link{interp.median} \tab Find the interpolated median, quartiles, or general quantiles. \cr \link{rescale} \tab Rescale data to specified mean and standard deviation \cr \link{table2df} \tab Convert a two dimensional table of counts to a matrix or data frame \cr } Data reduction through cluster and factor analysis\cr \tabular{ll}{ \link{fa} \tab Combined function for principal axis, minimum residual, weighted least squares, \cr \tab and maximum likelihood factor analysis\cr \link{factor.pa} \tab Do a principal Axis factor analysis (deprecated)\cr \link{factor.minres} \tab Do a minimum residual factor analysis (deprecated)\cr \link{factor.wls} \tab Do a weighted least squares factor analysis (deprecated)\cr \link{fa.graph} \tab Show the results of a factor analysis or principal components analysis graphically\cr \link{fa.diagram} \tab Show the results of a factor analysis without using Rgraphviz \cr \link{fa.sort} \tab Sort a factor or principal components output \cr \link{fa.extension} \tab Apply the Dwyer extension for factor loadingss \cr \link{principal} \tab Do an eigen value decomposition to find the principal components of a matrix\cr \link{fa.parallel} \tab Scree test and Parallel analysis \cr \link{fa.parallel.poly} \tab Scree test and Parallel analysis for polychoric matrices \cr \link{factor.scores} \tab Estimate factor scores given a data matrix and factor loadings \cr \link{guttman} \tab 8 different measures of reliability (6 from Guttman (1945) \cr \code{\link{irt.fa}} \tab Apply factor analysis to dichotomous items to get IRT parameters \cr \code{\link{iclust}} \tab Apply the ICLUST algorithm\cr \link{ICLUST.diagram} \tab The base R graphics output function called by \code{\link{iclust}} \cr \link{ICLUST.graph} \tab Graph the output from ICLUST using the dot language\cr \link{ICLUST.rgraph} \tab Graph the output from ICLUST using rgraphviz \cr \link{kaiser} \tab Apply kaiser normalization before rotating \cr \link{reliability} \tab A wrapper function to find alpha, omega, split half. etc. \cr \link{polychoric} \tab Find the polychoric correlations for items and find item thresholds\cr \link{poly.mat} \tab Find the polychoric correlations for items (uses J. Fox's hetcor) \cr \link{omega} \tab Calculate the omega estimate of factor saturation (requires the GPArotation package)\cr \link{omega.graph} \tab Draw a hierarchical or Schmid Leiman orthogonalized solution (uses Rgraphviz) \cr \link{partial.r} \tab Partial variables from a correlation matrix \cr \link{predict} \tab Predict factor/component scores for new data \cr \link{schmid} \tab Apply the Schmid Leiman transformation to a correlation matrix\cr \link{scoreItems} \tab Combine items into multiple scales and find alpha\cr \link{score.multiple.choice} \tab Combine items into multiple scales and find alpha and basic scale statistics\cr \link{scoreOverlap} \tab Find item and scale statistics (similar to \link{score.items}) but correct for item overlap \cr \link{lmCor} \tab Find Cohen's set correlation between two sets of variables (see also \link{lmCor} for the latest version)\cr \link{smc} \tab Find the Squared Multiple Correlation (used for initial communality estimates)\cr \link{tetrachoric} \tab Find tetrachoric correlations and item thresholds \cr \link{polyserial} \tab Find polyserial and biserial correlations for item validity studies \cr \link{mixed.cor} \tab Form a correlation matrix from continuous, polytomous, and dichotomous items \cr \link{VSS} \tab Apply the Very Simple Structure criterion to determine the appropriate number of factors.\cr \link{VSS.parallel} \tab Do a parallel analysis to determine the number of factors for a random matrix\cr \link{VSS.plot} \tab Plot VSS output\cr \link{VSS.scree} \tab Show the scree plot of the factor/principal components\cr \link{MAP} \tab Apply the Velicer Minimum Absolute Partial criterion for number of factors \cr } Functions for reliability analysis (some are listed above as well). \tabular{ll}{ \link{alpha} \tab Find coefficient alpha and Guttman Lambda 6 for a scale (see also \link{score.items})\cr \link{guttman} \tab 8 different measures of reliability (6 from Guttman (1945) \cr \link{omega} \tab Calculate the omega estimates of reliability (requires the GPArotation package)\cr \link{omegaSem} \tab Calculate the omega estimates of reliability using a Confirmatory model (requires the sem package)\cr \link{ICC} \tab Intraclass correlation coefficients \cr \link{score.items} \tab Combine items into multiple scales and find alpha\cr \link{glb.algebraic} \tab The greates lower bound found by an algebraic solution (requires Rcsdp). Written by Andreas Moeltner \cr } Procedures particularly useful for Synthetic Aperture Personality Assessment\cr \tabular{ll}{ \link{alpha} \tab Find coefficient alpha and Guttman Lambda 6 for a scale (see also \link{score.items})\cr \link{bestScales} \tab A bootstrap aggregation function for choosing most predictive unit weighted items \cr \link{make.keys} \tab Create the keys file for score.items or cluster.cor \cr \link{correct.cor} \tab Correct a correlation matrix for unreliability\cr \link{count.pairwise} \tab Count the number of complete cases when doing pair wise correlations\cr \link{cluster.cor} \tab find correlations of composite variables from larger matrix\cr \link{cluster.loadings} \tab find correlations of items with composite variables from a larger matrix\cr \link{eigen.loadings} \tab Find the loadings when doing an eigen value decomposition\cr \link{fa} \tab Do a minimal residual or principal axis factor analysis and estimate factor scores\cr \link{fa.extension} \tab Extend a factor analysis to a set of new variables\cr \link{factor.pa} \tab Do a Principal Axis factor analysis and estimate factor scores\cr \link{factor2cluster} \tab extract cluster definitions from factor loadings\cr \link{factor.congruence} \tab Factor congruence coefficient\cr \link{factor.fit} \tab How well does a factor model fit a correlation matrix\cr \link{factor.model} \tab Reproduce a correlation matrix based upon the factor model\cr \link{factor.residuals} \tab Fit = data - model\cr \link{factor.rotate} \tab ``hand rotate" factors\cr \link{guttman} \tab 8 different measures of reliability\cr \link{lmCor} \tab standardized multiple regression from raw or correlation matrix input Formerly called \link{lmCor}\cr \link{mat.regress} \tab standardized multiple regression from raw or correlation matrix input\cr \link{polyserial} \tab polyserial and biserial correlations with massive missing data\cr \link{tetrachoric} \tab Find tetrachoric correlations and item thresholds \cr } Functions for generating simulated data sets \cr \tabular{ll}{ \link{sim} \tab The basic simulation functions \cr \link{sim.anova} \tab Generate 3 independent variables and 1 or more dependent variables for demonstrating ANOVA \cr \tab and lm designs \cr \link{sim.circ} \tab Generate a two dimensional circumplex item structure \cr \link{sim.item} \tab Generate a two dimensional simple structure with particular item characteristics \cr \link{sim.congeneric} \tab Generate a one factor congeneric reliability structure \cr \link{sim.minor} \tab Simulate nfact major and nvar/2 minor factors \cr \link{sim.structural} \tab Generate a multifactorial structural model \cr \link{sim.irt} \tab Generate data for a 1, 2, 3 or 4 parameter logistic model\cr \link{sim.VSS} \tab Generate simulated data for the factor model\cr \link{phi.demo} \tab Create artificial data matrices for teaching purposes\cr \link{sim.hierarchical} \tab Generate simulated correlation matrices with hierarchical or any structure\cr \link{sim.spherical} \tab Generate three dimensional spherical data (generalization of circumplex to 3 space)\cr } Graphical functions (require Rgraphviz) -- deprecated \cr \tabular{ll}{ \link{structure.graph} \tab Draw a sem or regression graph \cr \link{fa.graph} \tab Draw the factor structure from a factor or principal components analysis \cr \link{omega.graph} \tab Draw the factor structure from an omega analysis(either with or without the Schmid Leiman transformation) \cr \link{ICLUST.graph} \tab Draw the tree diagram from ICLUST \cr } Graphical functions that do not require Rgraphviz \cr \tabular{ll}{ \link{diagram} \tab A general set of diagram functions. \cr \link{structure.diagram} \tab Draw a sem or regression graph \cr \link{fa.diagram} \tab Draw the factor structure from a factor or principal components analysis \cr \link{omega.diagram} \tab Draw the factor structure from an omega analysis(either with or without the Schmid Leiman transformation) \cr \link{ICLUST.diagram} \tab Draw the tree diagram from ICLUST \cr \link{plot.psych} \tab A call to plot various types of output (e.g. from irt.fa, fa, omega, iclust \cr \link{cor.plot} \tab A heat map display of correlations \cr \link{scatterHist} \tab Bivariate scatter plot and histograms \cr \link{spider} \tab Spider and radar plots (circular displays of correlations) } Circular statistics (for circadian data analysis) \cr \tabular{ll}{ \link{circadian.cor} \tab Find the correlation with e.g., mood and time of day \cr \link{circadian.linear.cor} \tab Correlate a circular value with a linear value \cr \link{circadian.mean} \tab Find the circular mean of each column of a a data set \cr \link{cosinor} \tab Find the best fitting phase angle for a circular data set \cr } Miscellaneous functions\cr \cr \tabular{ll}{ \link{comorbidity} \tab Convert base rate and comorbity to phi, Yule and tetrachoric\cr \link[psychTools]{df2latex} \tab Convert a data.frame or matrix to a LaTeX table \cr \link{dummy.code} \tab Convert categorical data to dummy codes \cr \link{fisherz} \tab Apply the Fisher r to z transform\cr \link{fisherz2r} \tab Apply the Fisher z to r transform\cr \link{ICC} \tab Intraclass correlation coefficients \cr \link{cortest.mat} \tab Test for equality of two matrices (see also cortest.normal, cortest.jennrich ) \cr \link{cortest.bartlett} \tab Test whether a matrix is an identity matrix \cr \link{paired.r} \tab Test for the difference of two paired or two independent correlations\cr \link{r.con} \tab Confidence intervals for correlation coefficients \cr \link{r.test} \tab Test of significance of r, differences between rs. \cr \link{p.rep} \tab The probability of replication given a p, r, t, or F \cr \link{phi} \tab Find the phi coefficient of correlation from a 2 x 2 table \cr \link{phi.demo} \tab Demonstrate the problem of phi coefficients with varying cut points \cr \link{phi2poly} \tab Given a phi coefficient, what is the polychoric correlation\cr \link{phi2poly.matrix} \tab Given a phi coefficient, what is the polychoric correlation (works on matrices)\cr \link{polar} \tab Convert 2 dimensional factor loadings to polar coordinates.\cr \link{scaling.fits} \tab Compares alternative scaling solutions and gives goodness of fits \cr \link{scrub} \tab Basic data cleaning \cr \link{tetrachor} \tab Finds tetrachoric correlations \cr \link{thurstone} \tab Thurstone Case V scaling \cr \link{tr} \tab Find the trace of a square matrix \cr \link{wkappa} \tab weighted and unweighted versions of Cohen's kappa \cr \link{Yule} \tab Find the Yule Q coefficient of correlation \cr \link{Yule.inv} \tab What is the two by two table that produces a Yule Q with set marginals? \cr \link{Yule2phi} \tab What is the phi coefficient corresponding to a Yule Q with set marginals? \cr \link{Yule2tetra} \tab Convert one or a matrix of Yule coefficients to tetrachoric coefficients. \cr } Functions that are under development and not recommended for casual use \cr \tabular{ll}{ \link{irt.item.diff.rasch} \tab IRT estimate of item difficulty with assumption that theta = 0\cr \link{irt.person.rasch} \tab Item Response Theory estimates of theta (ability) using a Rasch like model\cr\cr } Data sets included in the psych or psychTools package \cr \tabular{ll}{ \link[psychTools]{bfi} \tab represents 25 personality items thought to represent five factors of personality \cr \link{Thurstone} \tab 8 different data sets with a bifactor structure \cr \link[psychTools]{cities} \tab The airline distances between 11 cities (used to demonstrate MDS) \cr \link[psychTools]{epi.bfi} \tab 13 personality scales \cr \link[psychTools]{iqitems} \tab 14 multiple choice iq items \cr \link[psychTools]{msq} \tab 75 mood items \cr \link{sat.act} \tab Self reported ACT and SAT Verbal and Quantitative scores by age and gender\cr \link{Tucker} \tab Correlation matrix from Tucker \cr \link[psychTools]{galton} \tab Galton's data set of the heights of parents and their children \cr \link[psychTools]{heights} \tab Galton's data set of the relationship between height and forearm (cubit) length \cr \link[psychTools]{cubits} \tab Galton's data table of height and forearm length \cr \link[psychTools]{peas} \tab Galton`s data set of the diameters of 700 parent and offspring sweet peas \cr \link[psychTools]{vegetables} \tab Guilford`s preference matrix of vegetables (used for thurstone) \cr } A debugging function that may also be used as a demonstration of psych. \tabular{ll}{ \link{test.psych} \tab Run a test of the major functions on 5 different data sets. Primarily for development purposes.\cr \tab Although the output can be used as a demo of the various functions. } } \note{Development versions (source code) of this package are maintained at the repository \url{https://personality-project.org/r/} along with further documentation. Specify that you are downloading a source package. \cr Some functions require other packages. Specifically, omega and schmid require the GPArotation package, ICLUST.rgraph and fa.graph require Rgraphviz but have alternatives using the diagram functions. i.e.: \cr \tabular{ll}{ function \tab requires\cr \link{omega} \tab GPArotation \cr \link{schmid} \tab GPArotation\cr \link{ICLUST.rgraph} \tab Rgraphviz \cr \link{fa.graph} \tab Rgraphviz \cr \link{structure.graph} \tab Rgraphviz \cr \link{glb.algebraic} \tab Rcsdp \cr } } \author{William Revelle} \references{A general guide to personality theory and research may be found at the personality-project \url{https://personality-project.org/}. See also the short guide to R at \url{https://personality-project.org/r/}. In addition, see Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. Springer. at \url{https://personality-project.org/r/book/} Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9/} Preprint available from PsyArxiv } \keyword{package}% __ONLY ONE__ keyword per line \keyword{multivariate}% at least one, from doc/KEYWORDS \keyword{models}% __ONLY ONE__ keyword per line \keyword{cluster}% __ONLY ONE__ keyword per line \examples{ #See the separate man pages and the complete index. #to test most of the psych package run the following #test.psych() } psych/man/count.pairwise.Rd0000644000176200001440000001534114164100313015411 0ustar liggesusers\name{pairwiseCount} \alias{pairwiseCount} \alias{pairwiseCountBig} \alias{count.pairwise} \alias{pairwiseDescribe} \alias{pairwiseZero} \alias{pairwiseSample} \alias{pairwiseReport} \alias{pairwiseImpute} \alias{pairwisePlot} \title{Count number of pairwise cases for a data set with missing (NA) data and impute values. } \description{ When doing cor(x, use= "pairwise"), it is nice to know the number of cases for each pairwise correlation. This is particularly useful when doing SAPA type analyses. More importantly, when there are some missing pairs, it is useful to supply imputed values so that further analyses may be done. This is useful if using the Massively Missing Completely at Random (MMCAR) designs used by the SAPA project. The specific pairs missing may be identified by pairwiseZero. Summaries of the counts are given by pairwiseDescribe. } \usage{ pairwiseCount(x, y = NULL,diagonal=TRUE) pairwiseDescribe(x,y,diagonal=FALSE,...) pairwiseZero(x,y=NULL, min=0, short=TRUE) pairwiseImpute(keys,R,fix=FALSE) pairwiseReport(x,y=NULL,cut=0,diagonal=FALSE,...) pairwiseSample(x,y=NULL,diagonal=FALSE,size=100,...) pairwiseCountBig(x,size=NULL) pairwisePlot(x,y=NULL,upper=TRUE,diagonal=TRUE,labels=TRUE,show.legend=TRUE,n.legend=10, colors=FALSE,gr=NULL,minlength=6,xlas=1,ylas=2, main="Relative Frequencies",count=TRUE,...) count.pairwise(x, y = NULL,diagonal=TRUE) #deprecated } \arguments{ \item{x}{ An input matrix, typically a data matrix ready to be correlated. } \item{y}{ An optional second input matrix } \item{diagonal}{if TRUE, then report the diagonal, else fill the diagonals with NA} \item{...}{Other parameters to pass to describe} \item{min}{Count the number of item pairs with <= min entries} \item{short}{Show the table of the item pairs that have entries <= min} \item{keys}{A keys.list specifying which items belong to which scale.} \item{R}{A correlation matrix to be described or imputed} \item{cut}{Report the item pairs and numbers with cell sizes less than cut} \item{fix}{If TRUE, then replace all NA correlations with the mean correlation for that within or between scale} \item{upper}{Should the upper off diagonal matrix be drawn, or left blank?} \item{labels}{if NULL, use column and row names, otherwise use labels} \item{show.legend}{A legend (key) to the colors is shown on the right hand side} \item{n.legend}{How many categories should be labelled in the legend?} \item{colors}{Defaults to FALSE and will use a grey scale. colors=TRUE use colors \ from the colorRampPalette from red through white to blue} \item{minlength}{If not NULL, then the maximum number of characters to use in row/column labels} \item{xlas}{Orientation of the x axis labels (1 = horizontal, 0, parallel to axis, 2 perpendicular to axis)} \item{ylas}{Orientation of the y axis labels (1 = horizontal, 0, parallel to axis, 2 perpendicular to axis)} \item{main}{A title. Defaults to "Relative Frequencies"} \item{gr}{A color gradient: e.g., gr <- colorRampPalette(c("#B52127", "white", "#2171B5")) will produce slightly more pleasing (to some) colors. See next to last example of \code{\link{corPlot}}. } \item{count}{Should we count the number of pairwise observations using pairwiseCount, or just plot the counts for a matrix?} \item{size}{Sample size of the number of variables to sample in pairwiseSample} } \details{When using Massively Missing Completely at Random (MMCAR) designs used by the SAPA project, it is important to count the number of pairwise observations (\code{\link{pairwiseCount}}). If there are pairs with 1 or fewer observations, these will produce NA values for correlations making subsequent factor analyses \code{\link{fa}} or reliability analsyes \code{\link{omega}} or \code{\link{scoreOverlap}} impossible. \code{\link{pairwiseCountBig}} may be used to count the cells of large data sets. It is analogous to \code{\link{bigCor}} and returns the cell sizes for each pair of correlations. In order to identify item pairs with counts less than a certain value \code{\link{pairwiseReport}} reports the names of those pairs with fewer than 'cut' observations. By default, it just reports the number of offending items. With short=FALSE, the print will give the items with n.obs < cut. Even more detail is available in the returned objects. The specific pairs that have values <= n min in any particular table of the paiwise counts may be given by \code{\link{pairwiseZero}}. To remedy the problem of missing correlations, we impute the missing correlations using \code{\link{pairwiseImpute}}. The technique takes advantage of the scale based structure of SAPA items. Items within a scale (e.g. Letter Number Series similar to the \code{\link[psychTools]{ability}} items) are imputed to correlate with items from another scale (e.g., Matrix Reasoning) at the average of these two between scale inter-item mean correlations. The average correlations within and between scales are reported by \code{\link{pairwiseImpute}} and if the fix paremeter is specified, the imputed correlation matrix is returned. Alternative methods of imputing these correlations are not yet implemented. The time to count cell size varies linearly by the number of subjects and of the number of items squared. This becomes prohibitive for larger (big n items) data sets. \code{\link{pairwiseSample}} will take samples of size=size of these bigger data sets and then returns basic descriptive statistics of these counts, including mean, median, and the .05, .25, .5, .75 and .95 quantiles. } \value{ \item{result}{ = matrix of counts of pairwise observations (if pairwiseCount)} \item{av.r}{The average correlation value of the observed correlations within/between scales} \item{count}{The numer of observed correlations within/between each scale} \item{percent}{The percentage of complete data by scale} \item{imputed}{The original correlation matrix with NA values replaced by the mean correlation for items within/between the appropriate scale.} } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu}} \examples{ x <- matrix(rnorm(900),ncol=6) y <- matrix(rnorm(450),ncol=3) x[x < 0] <- NA y[y > 1] <- NA pairwiseCount(x) pairwiseCount(y) pairwiseCount(x,y) pairwiseCount(x,diagonal=FALSE) pairwiseDescribe(x,quant=c(.1,.25,.5,.75,.9)) #examine the structure of the ability data set keys <- list(ICAR16=colnames(psychTools::ability),reasoning = cs(reason.4,reason.16,reason.17,reason.19), letters=cs(letter.7, letter.33,letter.34,letter.58, letter.7), matrix=cs(matrix.45,matrix.46,matrix.47,matrix.55), rotate=cs(rotate.3,rotate.4,rotate.6,rotate.8)) pairwiseImpute(keys,psychTools::ability) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ multivariate }% __ONLY ONE__ keyword per line psych/man/corFiml.Rd0000644000176200001440000000445213463346202014045 0ustar liggesusers\name{corFiml} \alias{corFiml} \title{Find a Full Information Maximum Likelihood (FIML) correlation or covariance matrix from a data matrix with missing data } \description{Makes use of functions adapted from the lavaan package to find FIML covariance/correlation matrices. FIML can be much slower than the normal pairwise deletion option of cor, but provides slightly more precise estimates. } \usage{ corFiml(x, covar = FALSE,show=FALSE) } \arguments{ \item{x}{A data.frame or data matrix} \item{covar}{By default, just return the correlation matrix. If covar is TRUE, return a list containing the covariance matrix and the ML fit function.} \item{show}{If show=TRUE, then just show the patterns of missingness, but don't do the FIML. Useful for understanding the process of fiml.} } \details{In the presence of missing data, Full Information Maximum Likelihood (FIML) is an alternative to simply using the pairwise correlations. The implementation in the lavaan package for structural equation modeling has been adapted for the simpler case of just finding the correlations or covariances. The pairwise solution for any pair of variables is insensitive to other variables included in the matrix. On the other hand, the ML solution depends upon the entire set of items being correlated. This will lead to slightly different solutions for different subsets of variables. The basic FIML algorithm is to find the pairwise ML solution for covariances and means for every pattern of missingness and then to weight the solution by the size of every unique pattern of missingness. } \value{ \item{cor}{The correlation matrix found using FIML} \item{cov}{The covariance matrix found using FIML} \item{fx}{The ML fit function} } \author{Wiliam Revelle} \note{ The functions used in lavaan are not exported and so have been copied (and simplified) to the psych package. } \seealso{ To use the resulting correlations, see \code{\link{fa}}. To see the pairwise pattern of missingness, see \code{\link{count.pairwise}}. } \examples{ rML <- corFiml(psychTools::bfi[20:27]) rpw <- cor(psychTools::bfi[20:27],use="pairwise") round(rML - rpw,3) mp <- corFiml(psychTools::bfi[20:27],show=TRUE) mp } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{models} psych/man/cluster.plot.Rd0000644000176200001440000000703713736456555015132 0ustar liggesusers\name{cluster.plot} \alias{cluster.plot} \alias{fa.plot} \alias{factor.plot} \title{Plot factor/cluster loadings and assign items to clusters by their highest loading.} \description{Cluster analysis and factor analysis are procedures for grouping items in terms of a smaller number of (latent) factors or (observed) clusters. Graphical presentations of clusters typically show tree structures, although they can be represented in terms of item by cluster correlations. Cluster.plot plots items by their cluster loadings (taken, e.g., from \code{\link{ICLUST}}) or factor loadings (taken, eg., from \code{\link{fa}}). Cluster membership may be assigned apriori or may be determined in terms of the highest (absolute) cluster loading for each item. If the input is an object of class "kmeans", then the cluster centers are plotted. } \usage{ cluster.plot(ic.results, cluster = NULL, cut = 0, labels=NULL, title = "Cluster plot",pch=18,pos,show.points=TRUE,choose=NULL,...) fa.plot(ic.results, cluster = NULL, cut = 0, labels=NULL,title, jiggle=FALSE,amount=.02,pch=18,pos,show.points=TRUE,choose=NULL,main=NULL,...) factor.plot(ic.results, cluster = NULL, cut = 0, labels=NULL,title,jiggle=FALSE, amount=.02,pch=18,pos,show.points=TRUE,...) #deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ic.results}{A factor analysis or cluster analysis output including the loadings, or a matrix of item by cluster correlations. Or the output from a kmeans cluster analysis. } \item{cluster}{ A vector of cluster membership } \item{cut}{ Assign items to clusters if the absolute loadings are > cut } \item{labels}{If row.names exist they will be added to the plot, or, if they don't, labels can be specified. If labels =NULL, and there are no row names, then variables are labeled by row number.)} \item{title}{ Any title} \item{jiggle}{When plotting with factor loadings that are almost identical, it is sometimes useful to "jiggle" the points by jittering them. The default is to not jiggle.} \item{amount}{if jiggle=TRUE, then how much should the points be jittered?} \item{pch}{factor and clusters are shown with different pch values, starting at pch+1} \item{pos}{Position of the text for labels for two dimensional plots. 1=below, 2 = left, 3 = above, 4= right} \item{show.points}{When adding labels to the points, should we show the points as well as the labels. For many points, better to not show them, just the labels.} \item{choose}{Specify the factor/clusters to plot} \item{main}{Any title -- redundant with title} \item{...}{Further options to plot} } \details{Results of either a factor analysis or cluster analysis are plotted. Each item is assigned to its highest loading factor, and then identified by variable name as well as cluster (by color). The cluster assignments can be specified to override the automatic clustering by loading. Both of these functions may be called directly or by calling the generic plot function. (see example). } \value{ Graphical output is presented. } \author{William Revelle} \seealso{ \code{\link{ICLUST}}, \code{\link{ICLUST.graph}}, \code{\link{fa.graph}}, \code{\link{plot.psych}}} \examples{ circ.data <- circ.sim(24,500) circ.fa <- fa(circ.data,2) plot(circ.fa,cut=.5) f5 <- fa(psychTools::bfi[1:25],5) plot(f5,labels=colnames(psychTools::bfi)[1:25],show.points=FALSE) plot(f5,labels=colnames(psychTools::bfi)[1:25],show.points=FALSE,choose=c(1,2,4)) } \keyword{multivariate } \keyword{hplot }% __ONLY ONE__ keyword per line \keyword{cluster} psych/man/mediate.Rd0000644000176200001440000003306114526764161014071 0ustar liggesusers\name{mediate} \alias{mediate} \alias{mediate.diagram} \alias{moderate.diagram} \title{Estimate and display direct and indirect effects of mediators and moderator in path models} \description{ Find the direct and indirect effects of a predictor in path models of mediation and moderation. Bootstrap confidence intervals for the indirect effects. Mediation models are just extended regression models making explicit the effect of particular covariates in the model. Moderation is done by multiplication of the predictor variables. This function supplies basic mediation/moderation analyses for some of the classic problem types. } \usage{ mediate(y, x, m=NULL, data, mod = NULL, z = NULL, n.obs = NULL, use = "pairwise", n.iter = 5000, alpha = 0.05, std = FALSE,plot=TRUE,zero=TRUE,part=FALSE, main="Mediation") mediate.diagram(medi,digits=2,ylim=c(3,7),xlim=c(-1,10),show.c=TRUE, main="Mediation model",cex=1,l.cex=1,...) moderate.diagram(medi,digits=2,ylim=c(2,8),main="Moderation model", cex=1,l.cex=1,...) } \arguments{ \item{y}{The dependent variable (or a formula suitable for a linear model), If a formula, then this is of the form y ~ x +(m) -z (see details)} \item{x}{One or more predictor variables} \item{m}{One (or more) mediating variables} \item{data}{A data frame holding the data or a correlation or covariance matrix. } \item{mod}{A moderating variable, if desired} \item{z}{Variables to partial out, if desired} \item{n.obs}{If the data are from a correlation or covariance matrix, how many observations were used. This will lead to simulated data for the bootstrap.} \item{use}{use="pairwise" is the default when finding correlations or covariances} \item{n.iter}{Number of bootstrap resamplings to conduct} \item{alpha}{Set the width of the confidence interval to be 1 - alpha} \item{std}{standardize the covariances to find the standardized betas} \item{part}{if part=TRUE, find part correlations otherwise find partial correlations when partialling} \item{plot}{Plot the resulting paths} \item{zero}{By default, will zero center the data before doing moderation} \item{digits}{The number of digits to report in the mediate.diagram.} \item{medi}{The output from mediate may be imported into mediate.diagram} \item{ylim}{The limits for the y axis in the mediate and moderate diagram functions} \item{xlim}{The limits for the x axis. Make the minimum more negative if the x by x correlations do not fit.} \item{show.c}{If FALSE, do not draw the c lines, just the partialed (c') lines} \item{main}{The title for the mediate and moderate functions} \item{cex}{Adjust the text size (defaults to 1)} \item{l.cex}{Adjust the text size in arrows, defaults to cex which in turn defaults to 1} \item{...}{Additional graphical parameters to pass to mediate.diagram} } \details{ When doing linear modeling, it is frequently convenient to estimate the direct effect of a predictor controlling for the indirect effect of a mediator. See Preacher and Hayes (2004) for a very thorough discussion of mediation. The mediate function will do some basic mediation and moderation models, with bootstrapped confidence intervals for the mediation/moderation effects. Functionally, this is just regular linear regression and partial correlation with some different output. In the case of two predictor variables, X and M, and a criterion variable Y, then the direct effect of X on Y, labeled with the path c, is said to be mediated by the effect of x on M (path a) and the effect of M on Y (path b). This partial effect (a b) is said to mediate the direct effect of X --c--> Y: X --a -> M --b--> Y with X --c'--> Y where c' = c - ab. Testing the significance of the ab mediation effect is done through bootstrapping many random resamples (with replacement) of the data. For moderation, the moderation effect of Z on the relationship between X -> Y is found by taking the (centered) product of X and Z and then adding this XZ term into the regression. By default, the data are zero centered before doing moderation (product terms). This is following the advice of Cohen, Cohen, West and Aiken (2003). However, to agree with the analyses reported in Hayes (2013) we can set the zero=FALSE option to not zero center the data. To partial out variables, either define them in the z term, or express as negative entries in the formula mode: y1 ~ x1 + x2 + (m1)+ (m2) -z will look for the effect of x1 and x2 on y, mediated through m1 and m2 after z is partialled out. Moderated mediation is done by specifying a product term. y1 ~ x1 + x2*x3 + (m1)+ (m2) -z will look for the effect of x1, x2, x3 and the product of x2 and x3 on y, mediated through m1 and m2 after z is partialled out. In the case of being provided just a correlation matrix, the bootstrapped values are based upon bootstrapping from data matching the original covariance/correlation matrix with the addition of normal errors. This allows us to test the mediation/moderation effect even if not given raw data. Moderation can not be done with just correlation matrix. The function has been tested against some of the basic cases and examples in Hayes (2013) and the associated data sets. Unless there is a temporal component that allows one to directly distinguish causal paths (time does not reverse direction), interpreting mediation models is problematic. Some people find it useful to compare the differences between mediation models where the causal paths (arrows) are reversed. This is a mistake and should not be done (Thoemmes, 2015). For fine tuning the size of the graphic output, xlim and ylim can be specified in the mediate.diagram function. Otherwise, the graphics produced by mediate and moderate use the default xlim and ylim values. Interaction terms (moderation) or mediated moderation can be specified as product terms. } \value{ \item{total}{The total direct effect of x on y (c)} \item{direct}{The beta effects of x (c') and m (b) on y } \item{indirect}{The indirect effect of x through m on y (c-ab)} \item{mean.boot}{mean bootstrapped value of indirect effect} \item{sd.boot}{Standard deviation of bootstrapped values} \item{ci.quant}{The upper and lower confidence intervals based upon the quantiles of the bootstrapped distribution.} \item{boot}{The bootstrapped values themselves.} \item{a}{The effect of x on m} \item{b}{The effect of m on y} \item{b.int}{The interaction of x and mod (if specified)} \item{data}{The original data plus the product term (if specified)} } \references{ J. Cohen, P. Cohen, S.G. West, and L.S. Aiken. (2003) Applied multiple regression/correlation analysis for the behavioral sciences. L. Erlbaum Associates, Mahwah, N.J., 3rd ed edition. Hayes, Andrew F. (2013) Introduction to mediation, moderation, and conditional process analysis: A regression-based approach. Guilford Press. Preacher, Kristopher J and Hayes, Andrew F (2004) SPSS and SAS procedures for estimating indirect effects in simple mediation models. Behavior Research Methods, Instruments, & Computers 36, (4) 717-731. Thoemmes, Felix (2015) Reversing arrows in mediation models does not distinguish plausible models. Basic and applied social psychology, 27: 226-234. Data from Hayes (2013), Preacher and Hayes (2004), and from Kerchoff (1974). The Tal_Or data set is from Nurit Tal-Or and Jonathan Cohen and Yariv Tsfati and Albert C. Gunther, (2010) ``Testing Causal Direction in the Influence of Presumed Media Influence", Communication Research, 37, 801-824 and is used with their kind permission. It is adapted from the webpage of A.F. Hayes. (www.afhayes.com/public/hayes2013data.zip). The Garcia data set is from Garcia, Donna M. and Schmitt, Michael T. and Branscombe, Nyla R. and Ellemers, Naomi (2010). Women's reactions to ingroup members who protest discriminatory treatment: The importance of beliefs about inequality and response appropriateness. European Journal of Social Psychology, (40) 733-745 and is used with their kind permission. It was downloaded from the Hayes (2013) website. For an example of how to display the sexism by protest interaction, see the examples in the \code{\link{GSBE}} (Garcia) data set. See the ``how to do mediation and moderation" at personality-project.org/r/psych/HowTo/mediation.pdf as well as the introductory vignette. } \author{William Revelle } \note{ There are a number of other packages that do mediation analysis (e.g., sem and lavaan) and they are probably preferred for more complicated models. This function is supplied for the more basic cases, with 1..k y variables, 1..n x variables, 1 ..j mediators and 1 ..z variables to partial. The number of moderated effects is not limited, but more than 3rd order interactions are not very meaningful. It will not do two step mediation. The current version will not correctly handle more than one DV. } \seealso{\code{\link{lmCor}} and \code{\link{lmCor.diagram}} for regression and moderation, \code{\link{Garcia}} for further demonstrations of mediation and moderation. } \examples{ # A simple mediation example is the Tal_Or data set (pmi for Hayes) #The pmi data set from Hayes is available as the Tal_Or data set. mod4 <- mediate(reaction ~ cond + (pmi), data =Tal_Or,n.iter=50) summary(mod4) #Two mediators (from Hayes model 6 (chapter 5)) mod6 <- mediate(reaction ~ cond + (pmi) + (import), data =Tal_Or,n.iter=50) summary(mod6) #Moderated mediation is done for the Garcia (Garcia, 2010) data set. # (see Hayes, 2013 for the protest data set #n.iter set to 50 (instead of default of 5000) for speed of example #no mediation, just an interaction mod7 <- mediate(liking ~ sexism * prot2 , data=Garcia, n.iter = 50) summary(mod7) data(GSBE) #The Garcia et al data set (aka GSBE) mod11.4 <- mediate(liking ~ sexism * prot2 + (respappr), data=Garcia, n.iter = 50,zero=FALSE) #to match Hayes summary(mod11.4) #to see this interaction graphically, run the examples in ?Garcia #data from Preacher and Hayes (2004) sobel <- structure(list(SATIS = c(-0.59, 1.3, 0.02, 0.01, 0.79, -0.35, -0.03, 1.75, -0.8, -1.2, -1.27, 0.7, -1.59, 0.68, -0.39, 1.33, -1.59, 1.34, 0.1, 0.05, 0.66, 0.56, 0.85, 0.88, 0.14, -0.72, 0.84, -1.13, -0.13, 0.2), THERAPY = structure(c(0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0), value.labels = structure(c(1, 0), .Names = c("cognitive", "standard"))), ATTRIB = c(-1.17, 0.04, 0.58, -0.23, 0.62, -0.26, -0.28, 0.52, 0.34, -0.09, -1.09, 1.05, -1.84, -0.95, 0.15, 0.07, -0.1, 2.35, 0.75, 0.49, 0.67, 1.21, 0.31, 1.97, -0.94, 0.11, -0.54, -0.23, 0.05, -1.07)), .Names = c("SATIS", "THERAPY", "ATTRIB" ), row.names = c(NA, -30L), class = "data.frame", variable.labels = structure(c("Satisfaction", "Therapy", "Attributional Positivity"), .Names = c("SATIS", "THERAPY", "ATTRIB"))) #n.iter set to 50 (instead of default of 5000) for speed of example #There are several forms of input. The original specified y, x , and the mediator #mediate(1,2,3,sobel,n.iter=50) #The example in Preacher and Hayes #As of October, 2017 we can specify this in a formula mode mediate (SATIS ~ THERAPY + (ATTRIB),data=sobel, n.iter=50) #specify the mediator by # adding parentheses #A.C. Kerchoff, (1974) Ambition and Attainment: A Study of Four Samples of American Boys. #Data from sem package taken from Kerckhoff (and in turn, from Lisrel manual) R.kerch <- structure(list(Intelligence = c(1, -0.1, 0.277, 0.25, 0.572, 0.489, 0.335), Siblings = c(-0.1, 1, -0.152, -0.108, -0.105, -0.213, -0.153), FatherEd = c(0.277, -0.152, 1, 0.611, 0.294, 0.446, 0.303), FatherOcc = c(0.25, -0.108, 0.611, 1, 0.248, 0.41, 0.331), Grades = c(0.572, -0.105, 0.294, 0.248, 1, 0.597, 0.478 ), EducExp = c(0.489, -0.213, 0.446, 0.41, 0.597, 1, 0.651), OccupAsp = c(0.335, -0.153, 0.303, 0.331, 0.478, 0.651, 1 )), .Names = c("Intelligence", "Siblings", "FatherEd", "FatherOcc", "Grades", "EducExp", "OccupAsp"), class = "data.frame", row.names = c("Intelligence", "Siblings", "FatherEd", "FatherOcc", "Grades", "EducExp", "OccupAsp" )) #n.iter set to 50 (instead of default of 5000) for speed of demo #mod.k <- mediate("OccupAsp","Intelligence",m= c(2:5),data=R.kerch,n.obs=767,n.iter=50) #new style mod.k <- mediate(OccupAsp ~ Intelligence + (Siblings) + (FatherEd) + (FatherOcc) + (Grades), data = R.kerch, n.obs=767, n.iter=50) mediate.diagram(mod.k) #print the path values mod.k #Compare the following solution to the path coefficients found by the sem package #mod.k2 <- mediate(y="OccupAsp",x=c("Intelligence","Siblings","FatherEd","FatherOcc"), # m= c(5:6),data=R.kerch,n.obs=767,n.iter=50) #new format mod.k2 <- mediate(OccupAsp ~ Intelligence + Siblings + FatherEd + FatherOcc + (Grades) + (EducExp),data=R.kerch, n.obs=767, n.iter=50) mediate.diagram(mod.k2,show.c=FALSE) #simpler output #print the path values mod.k2 #Several interesting test cases are taken from analyses of the Spengler data set #This is temporarily added to psych from psychTools to help build for CRAN #Although the sample sizes are actually very large in the first wave, I use the #sample sizes from the last wave #We set the n.iter to be 50 instead of the default value of 5,000 if(require("psychTools")) { mod1 <- mediate(Income.50 ~ IQ + Parental+ (Ed.11) ,data=Spengler, n.obs = 1952, n.iter=50) mod2 <- mediate(Income.50 ~ IQ + Parental+ (Ed.11) + (Income.11) ,data=Spengler,n.obs = 1952, n.iter=50) #Now, compare these models anova(mod1,mod2) #Current version does not support two DVs #mod22 <- mediate(Income.50 + Educ.50 ~ IQ + Parental+ (Ed.11) + (Income.11) # ,data=Spengler,n.obs = 1952, n.iter=50) } } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/draw.tetra.Rd0000644000176200001440000000425412436644265014536 0ustar liggesusers\name{draw.tetra} \alias{draw.tetra} \alias{draw.cor} \title{Draw a correlation ellipse and two normal curves to demonstrate tetrachoric correlation} \description{A graphic of a correlation ellipse divided into 4 regions based upon x and y cutpoints on two normal distributions. This is also an example of using the layout function. Draw a bivariate density plot to show how tetrachorics work. } \usage{ draw.tetra(r, t1, t2,shade=TRUE) draw.cor(r=.5,expand=10,theta=30,phi=30,N=101,nbcol=30,box=TRUE, main="Bivariate density rho = ",cuts=NULL,all=TRUE,ellipses=TRUE,ze=.15) } \arguments{ \item{r}{the underlying Pearson correlation defines the shape of the ellipse} \item{t1}{X is cut at tau} \item{t2}{Y is cut at Tau} \item{shade}{shade the diagram (default is TRUE)} \item{expand}{The relative height of the z axis} \item{theta}{The angle to rotate the x-y plane} \item{phi}{The angle above the plane to view the graph} \item{N}{The grid resolution} \item{nbcol}{The color resolution} \item{box}{Draw the axes} \item{main}{The main title} \item{cuts}{Should the graphic show cuts (e.g., cuts=c(0,0))} \item{all}{Show all four parts of the tetrachoric} \item{ellipses}{Draw a correlation ellipse} \item{ze}{height of the ellipse if requested} } \details{ A graphic demonstration of the \code{\link{tetrachoric}} correlation. Used for teaching purposes. The default values are for a correlation of .5 with cuts at 1 and 1. Any other values are possible. The code is also a demonstration of how to use the \code{\link{layout}} function for complex graphics using base graphics. } \author{ William Revelle } \seealso{\code{\link{tetrachoric}} to find tetrachoric correlations, \code{\link{irt.fa}} and \code{\link{fa.poly}} to use them in factor analyses, \code{\link{scatter.hist}} to show correlations and histograms. } \examples{ #if(require(mvtnorm)) { #draw.tetra(.5,1,1) #draw.tetra(.8,2,1)} else {print("draw.tetra requires the mvtnorm package") #draw.cor(.5,cuts=c(0,0))} draw.tetra(.5,1,1) draw.tetra(.8,2,1) draw.cor(.5,cuts=c(0,0)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{hplot}psych/man/cluster2keys.Rd0000644000176200001440000000254711311321734015105 0ustar liggesusers\name{cluster2keys} \alias{cluster2keys} \title{Convert a cluster vector (from e.g., kmeans) to a keys matrix suitable for scoring item clusters. } \description{The output of the kmeans clustering function produces a vector of cluster membership. The \code{\link{score.items}} and \code{\link{cluster.cor}} functions require a matrix of keys. cluster2keys does this. May also be used to take the output of an \code{\link{ICLUST}} analysis and find a keys matrix. (By doing a call to the \code{\link{factor2cluster}} function. } \usage{ cluster2keys(c) } \arguments{ \item{c}{A vector of cluster assignments or an object of class ``kmeans" that contains a vector of clusters. } } \details{Note that because kmeans will not reverse score items, the clusters defined by kmeans will not necessarily match those of ICLUST with the same number of clusters extracted. } \value{ \item{keys}{A matrix of keys suitable for score.items or cluster.cor} } \author{William Revelle} \seealso{ \code{\link{cluster.cor}},\code{\link{score.items}}, \code{\link{factor2cluster}}, \code{\link{make.keys}}} \examples{ test.data <- Harman74.cor$cov kc <- kmeans(test.data,4) keys <- cluster2keys(kc) keys #these match those found by ICLUST cluster.cor(keys,test.data) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } psych/man/interp.median.Rd0000644000176200001440000000633413464313327015213 0ustar liggesusers\name{interp.median} \alias{interp.median} \alias{interp.quantiles} \alias{interp.quartiles} \alias{interp.boxplot} \alias{interp.values} \alias{interp.qplot.by} \alias{interp.q} \alias{interp.quart} \title{Find the interpolated sample median, quartiles, or specific quantiles for a vector, matrix, or data frame} \description{For data with a limited number of response categories (e.g., attitude items), it is useful treat each response category as range with width, w and linearly interpolate the median, quartiles, or any quantile value within the median response. } \usage{ interp.median(x, w = 1,na.rm=TRUE) interp.quantiles(x, q = .5, w = 1,na.rm=TRUE) interp.quartiles(x,w=1,na.rm=TRUE) interp.boxplot(x,w=1,na.rm=TRUE) interp.values(x,w=1,na.rm=TRUE) interp.qplot.by(y,x,w=1,na.rm=TRUE,xlab="group",ylab="dependent", ylim=NULL,arrow.len=.05,typ="b",add=FALSE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{input vector } \item{q}{quantile to estimate ( 0 < q < 1} \item{w}{category width} \item{y}{input vector for interp.qplot.by} \item{na.rm}{should missing values be removed} \item{xlab}{x label} \item{ylab}{Y label} \item{ylim}{limits for the y axis} \item{arrow.len}{length of arrow in interp.qplot.by} \item{typ}{plot type in interp.qplot.by} \item{add}{add the plot or not} \item{...}{additional parameters to plotting function} } \details{If the total number of responses is N, with median, M, and the number of responses at the median value, Nm >1, and Nb= the number of responses less than the median, then with the assumption that the responses are distributed uniformly within the category, the interpolated median is M - .5w + w*(N/2 - Nb)/Nm. The generalization to 1st, 2nd and 3rd quartiles as well as the general quantiles is straightforward. A somewhat different generalization allows for graphic presentation of the difference between interpolated and non-interpolated points. This uses the interp.values function. If the input is a matrix or data frame, quantiles are reported for each variable. } \value{ \item{im}{interpolated median(quantile)} \item{v}{interpolated values for all data points} } \seealso{ \code{\link{median}}} \examples{ interp.median(c(1,2,3,3,3)) # compare with median = 3 interp.median(c(1,2,2,5)) interp.quantiles(c(1,2,2,5),.25) x <- sample(10,100,TRUE) interp.quartiles(x) # x <- c(1,1,2,2,2,3,3,3,3,4,5,1,1,1,2,2,3,3,3,3,4,5,1,1,1,2,2,3,3,3,3,4,2) y <- c(1,2,3,3,3,3,4,4,4,4,4,1,2,3,3,3,3,4,4,4,4,5,1,5,3,3,3,3,4,4,4,4,4) x <- x[order(x)] #sort the data by ascending order to make it clearer y <- y[order(y)] xv <- interp.values(x) yv <- interp.values(y) barplot(x,space=0,xlab="ordinal position",ylab="value") lines(1:length(x)-.5,xv) points(c(length(x)/4,length(x)/2,3*length(x)/4),interp.quartiles(x)) barplot(y,space=0,xlab="ordinal position",ylab="value") lines(1:length(y)-.5,yv) points(c(length(y)/4,length(y)/2,3*length(y)/4),interp.quartiles(y)) data(psychTools::galton) galton <- psychTools::galton interp.median(galton) interp.qplot.by(galton$child,galton$parent,ylab="child height" ,xlab="Mid parent height") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{univar} psych/man/polar.Rd0000644000176200001440000000461012262077744013573 0ustar liggesusers\name{polar} \alias{polar} \title{Convert Cartesian factor loadings into polar coordinates } \description{Factor and cluster analysis output typically presents item by factor correlations (loadings). Tables of factor loadings are frequently sorted by the size of loadings. This style of presentation tends to make it difficult to notice the pattern of loadings on other, secondary, dimensions. By converting to polar coordinates, it is easier to see the pattern of the secondary loadings. } \usage{ polar(f, sort = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{f}{A matrix of loadings or the output from a factor or cluster analysis program} \item{sort}{ sort=TRUE: sort items by the angle of the items on the first pair of factors.} } \details{Although many uses of factor analysis/cluster analysis assume a simple structure where items have one and only one large loading, some domains such as personality or affect items have a more complex structure and some items have high loadings on two factors. (These items are said to have complexity 2, see \code{\link{VSS}}). By expressing the factor loadings in polar coordinates, this structure is more readily perceived. For each pair of factors, item loadings are converted to an angle with the first factor, and a vector length corresponding to the amount of variance in the item shared with the two factors. For a two dimensional structure, this will lead to a column of angles and a column of vector lengths. For n factors, this leads to n* (n-1)/2 columns of angles and an equivalent number of vector lengths. } \value{ \item{polar }{A data frame of polar coordinates } } \references{Rafaeli, E. & Revelle, W. (2006). A premature consensus: Are happiness and sadness truly opposite affects? Motivation and Emotion. \\ Hofstee, W. K. B., de Raad, B., & Goldberg, L. R. (1992). Integration of the big five and circumplex approaches to trait structure. Journal of Personality and Social Psychology, 63, 146-163.} \author{William Revelle} \seealso{ \code{\link{ICLUST}}, \code{\link{cluster.plot}}, \code{\link{circ.tests}}, \code{\link{fa}} } \examples{ circ.data <- circ.sim(24,500) circ.fa <- fa(circ.data,2) circ.polar <- round(polar(circ.fa),2) circ.polar #compare to the graphic cluster.plot(circ.fa) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/cluster.fit.Rd0000644000176200001440000000535513256544621014724 0ustar liggesusers\name{cluster.fit} \alias{cluster.fit} \title{ cluster Fit: fit of the cluster model to a correlation matrix } \description{ How well does the cluster model found by \code{\link{ICLUST}} fit the original correlation matrix? A similar algorithm \code{\link{factor.fit}} is found in \code{\link{VSS}}. This function is internal to ICLUST but has more general use as well. In general, the cluster model is a Very Simple Structure model of complexity one. That is, every item is assumed to represent only one factor/cluster. Cluster fit is an analysis of how well this model reproduces a correlation matrix. Two measures of fit are given: cluster fit and factor fit. Cluster fit assumes that variables that define different clusters are orthogonal. Factor fit takes the loadings generated by a cluster model, finds the cluster loadings on all clusters, and measures the degree of fit of this somewhat more complicated model. Because the cluster loadings are similar to, but not identical to factor loadings, the factor fits found here and by \code{\link{factor.fit}} will be similar. } \usage{ cluster.fit(original, load, clusters, diagonal = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{original}{The original correlation matrix being fit } \item{load}{ Cluster loadings -- that is, the correlation of individual items with the clusters, corrected for item overlap } \item{clusters}{The cluster structure } \item{diagonal}{ Should we fit the diagonal as well? } } \details{The cluster model is similar to the factor model: R is fitted by C'C. Where C <- Cluster definition matrix x the loading matrix. How well does this model approximate the original correlation matrix and how does this compare to a factor model? The fit statistic is a comparison of the original (squared) correlations to the residual correlations. Fit = 1 - r*2/r2 where r* is the residual correlation of data - model and model = C'C. } \value{ \item{clusterfit }{The cluster model is a reduced form of the factor loading matrix. That is, it is the product of the elements of the cluster matrix * the loading matrix. } \item{factorfit }{How well does the complete loading matrix reproduce the correlation matrix?} } \references{ \url{https://personality-project.org/r/r.ICLUST.html} } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu} } \seealso{ \code{\link{VSS}}, \code{\link{ICLUST}}, \code{\link{factor2cluster}}, \code{\link{cluster.cor}}, \code{\link{factor.fit}}} \examples{ r.mat<- Harman74.cor$cov iq.clus <- ICLUST(r.mat,nclusters =2) fit <- cluster.fit(r.mat,iq.clus$loadings,iq.clus$clusters) fit } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ cluster }% __ONLY ONE__ keyword per line psych/man/score.alpha.Rd0000644000176200001440000000654513715123436014660 0ustar liggesusers\name{score.alpha} \alias{score.alpha} \title{ Score scales and find Cronbach's alpha as well as associated statistics} \description{Given a matrix or data.frame of k keys for m items (-1, 0, 1), and a matrix or data.frame of items scores for m items and n people, find the sum scores or average scores for each person and each scale. In addition, report Cronbach's alpha, the average r, the scale intercorrelations, and the item by scale correlations. (Superseded by \code{\link{score.items}}). } \usage{ score.alpha(keys, items, labels = NULL, totals=TRUE,digits = 2) #deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{keys}{ A matrix or dataframe of -1, 0, or 1 weights for each item on each scale } \item{items}{Data frame or matrix of raw item scores } \item{labels}{column names for the resulting scales} \item{totals}{Find sum scores (default) or average score} \item{digits}{Number of digits for answer (default =2) } } \details{This function has been replaced with \code{\link{scoreItems}} (for multiple scales) and \code{\link{alpha}} for single scales. The process of finding sum or average scores for a set of scales given a larger set of items is a typical problem in psychometric research. Although the structure of scales can be determined from the item intercorrelations, to find scale means, variances, and do further analyses, it is typical to find the sum or the average scale score. Various estimates of scale reliability include ``Cronbach's alpha", and the average interitem correlation. For k = number of items in a scale, and av.r = average correlation between items in the scale, alpha = k * av.r/(1+ (k-1)*av.r). Thus, alpha is an increasing function of test length as well as the test homeogeneity. Alpha is a poor estimate of the general factor saturation of a test (see Zinbarg et al., 2005) for it can seriously overestimate the size of a general factor, and a better but not perfect estimate of total test reliability because it underestimates total reliability. None the less, it is a useful statistic to report. } \value{ \item{scores }{Sum or average scores for each subject on the k scales} \item{alpha }{Cronbach's coefficient alpha. A simple (but non-optimal) measure of the internal consistency of a test. See also beta and omega.} \item{av.r}{The average correlation within a scale, also known as alpha 1 is a useful index of the internal consistency of a domain.} \item{n.items}{Number of items on each scale} \item{cor}{The intercorrelation of all the scales} \item{item.cor}{The correlation of each item with each scale. Because this is not corrected for item overlap, it will overestimate the amount that an item correlates with the other items in a scale.} } \references{An introduction to psychometric theory with applications in R (in preparation). \url{https://personality-project.org/r/book/}} \author{ William Revelle } \seealso{ \code{\link{score.items}}, \code{\link{alpha}}, \code{\link{correct.cor}}, \code{\link{cluster.loadings}}, \code{\link{omega}} } \examples{ y <- attitude #from the datasets package keys <- matrix(c(rep(1,7),rep(1,4),rep(0,7),rep(-1,3)),ncol=3) labels <- c("first","second","third") x <- score.alpha(keys,y,labels) #deprecated } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{models} psych/man/faMulti.Rd0000644000176200001440000001571614471532013014055 0ustar liggesusers\name{fa.multi} \alias{fa.multi} \alias{fa.multi.diagram} %- Also NEED an '\alias' for EACH other topic documented here. \title{Multi level (hierarchical) factor analysis } \description{Some factor analytic solutions produce correlated factors which may in turn be factored. If the solution has one higher order, the omega function is most appropriate. But, in the case of multi higher order factors, then the faMulti function will do a lower level factoring and then factor the resulting correlation matrix. Multi level factor diagrams are also shown. } \usage{ fa.multi(r, nfactors = 3, nfact2 = 1, n.obs = NA, n.iter = 1, rotate = "oblimin", scores = "regression", residuals = FALSE, SMC = TRUE, covar = FALSE, missing = FALSE,impute = "median", min.err = 0.001, max.iter = 50, symmetric = TRUE, warnings =TRUE, fm = "minres", alpha = 0.1, p = 0.05, oblique.scores = FALSE, np.obs = NULL, use ="pairwise", cor = "cor", ...) fa.multi.diagram(multi.results,sort=TRUE,labels=NULL,flabels=NULL,f2labels=NULL,cut=.2, gcut=.2, simple=TRUE,errors=FALSE, digits=1,e.size=.1,rsize=.15,side=3,main=NULL,cex=NULL,color.lines=TRUE ,marg=c(.5,.5,1.5,.5),adj=2, ...) } \arguments{The arguments match those of the fa function. \item{r}{ A correlation matrix or raw data matrix } \item{nfactors}{The desired number of factors for the lower level } \item{nfact2}{The desired number of factors for the higher level } \item{n.obs}{Number of observations used to find the correlation matrix if using a correlation matrix. Used for finding the goodness of fit statistics. Must be specified if using a correlaton matrix and finding confidence intervals.} \item{np.obs}{The pairwise number of observations. Used if using a correlation matrix and asking for a minchi solution.} \item{rotate}{"none", "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor" are orthogonal rotations. "promax", "oblimin", "simplimax", "bentlerQ, "geominQ" and "biquartimin" and "cluster" are possible oblique transformations of the solution. The default is to do a oblimin transformation, although versions prior to 2009 defaulted to varimax.} \item{n.iter}{Number of bootstrap interations to do in fa or fa.poly} \item{residuals}{Should the residual matrix be shown } \item{scores}{the default="regression" finds factor scores using regression. Alternatives for estimating factor scores include simple regression ("Thurstone"), correlaton preserving ("tenBerge") as well as "Anderson" and "Bartlett" using the appropriate algorithms (see factor.scores). Although scores="tenBerge" is probably preferred for most solutions, it will lead to problems with some improper correlation matrices. } \item{SMC}{Use squared multiple correlations (SMC=TRUE) or use 1 as initial communality estimate. Try using 1 if imaginary eigen values are reported. If SMC is a vector of length the number of variables, then these values are used as starting values in the case of fm='pa'. } \item{covar}{if covar is TRUE, factor the covariance matrix, otherwise factor the correlation matrix} \item{missing}{if scores are TRUE, and missing=TRUE, then impute missing values using either the median or the mean} \item{impute}{"median" or "mean" values are used to replace missing values} \item{min.err}{Iterate until the change in communalities is less than min.err} \item{max.iter}{Maximum number of iterations for convergence } \item{symmetric}{symmetric=TRUE forces symmetry by just looking at the lower off diagonal values} \item{warnings}{warnings=TRUE => warn if number of factors is too many } \item{fm}{factoring method fm="minres" will do a minimum residual (OLS), fm="wls" will do a weighted least squares (WLS) solution, fm="gls" does a generalized weighted least squares (GLS), fm="pa" will do the principal factor solution, fm="ml" will do a maximum likelihood factor analysis. fm="minchi" will minimize the sample size weighted chi square when treating pairwise correlations with different number of subjects per pair.} \item{alpha}{alpha level for the confidence intervals for RMSEA} \item{p}{if doing iterations to find confidence intervals, what probability values should be found for the confidence intervals} \item{oblique.scores}{When factor scores are found, should they be based on the structure matrix (default) or the pattern matrix (oblique.scores=TRUE). } \item{use}{How to treat missing data, use="pairwise" is the default". See cor for other options.} \item{cor}{How to find the correlations: "cor" is Pearson", "cov" is covariance, "tet" is tetrachoric, "poly" is polychoric, "mixed" uses mixed cor for a mixture of tetrachorics, polychorics, Pearsons, biserials, and polyserials, Yuleb is Yulebonett, Yuleq and YuleY are the obvious Yule coefficients as appropriate} \item{multi.results}{The results from fa.multi} \item{labels}{ variable labels } \item{flabels}{Labels for the factors (not counting g)} \item{f2labels}{The labels for the second order factors} \item{size}{size of graphics window } \item{digits}{ Precision of labels } \item{cex}{control font size} \item{color.lines}{Use black for positive, red for negative} \item{marg}{The margins for the figure are set to be wider than normal by default} \item{adj}{Adjust the location of the factor loadings to vary as factor mod 4 + 1} \item{main}{ main figure caption } \item{\dots}{additional parameters, specifically, keys may be passed if using the target rotation, or delta if using geominQ, or whether to normalize if using Varimax. In addition, for fa.multi.diagram, other options to pass into the graphics packages } \item{e.size}{the size to draw the ellipses for the factors. This is scaled by the number of variables.} \item{cut}{Minimum path coefficient to draw} \item{gcut}{Minimum general factor path to draw} \item{simple}{draw just one path per item} \item{sort}{sort the solution before making the diagram} \item{side}{on which side should errors be drawn?} \item{errors}{show the error estimates} \item{rsize}{size of the rectangles} } \details{ See \code{\link{fa}} and \code{\link{omega}} for a discussion of factor analysis and of the case of one higher order factor. } \value{ \item{f1}{The standard output from a factor analysis from \code{\link{fa}} for the raw variables} \item{f2}{The standard output from a factor analysis from \code{\link{fa}} for the correlation matrix of the level 1 solution. } } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \note{ This is clearly an early implementation (Feb 14 2016) which might be improved. } \seealso{ \code{\link{fa}}, \code{\link{omega}} } \examples{ f31 <- fa.multi(Thurstone,3,1) #compare with \code{\link{omega}} f31 fa.multi.diagram(f31) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models} psych/man/faCor.Rd0000644000176200001440000001170613463346446013516 0ustar liggesusers\name{faCor} \alias{faCor} \title{Correlations between two factor analysis solutions} \description{ Given two factor analysis or pca solutions to a data matrix or correlation, what are the similarities between the two solutions. This may be found by factor correlations as well as factor congruences. Factor correlations are found by the matrix product of the factor weights and the correlation matrix and are estimates of what the factor score correlations would be. Factor congruence (aka Tucker or Burt coefficient) is the cosine of the vectors of factor loadings. } \usage{ faCor(r, nfactors = c(1, 1), fm = c("minres", "minres"), rotate = c("oblimin", "oblimin"), scores = c("tenBerge", "tenBerge"), adjust=c(TRUE,TRUE), use = "pairwise", cor = "cor", weight = NULL, correct = 0.5,Target=list(NULL,NULL)) } \arguments{ \item{r}{A correlation matrix or a data matrix suitable for factoring} \item{nfactors}{Number of factors in each solution to extract} \item{fm}{Factor method. The default is 'minres' factoring. To compare with pca solutions, can also be (fm ="pca") } \item{rotate}{What type of rotations to apply. The default for factors is oblimin, for pca is varimax.} \item{scores}{What factor scoring algorithm should be used. Defaults to tenBerge for both cases.} \item{adjust}{Should the factor intercorrelations be corrected by the lack of factor deteriminancy (i.e. divide by the square root of the factor R2)} \item{use}{How to treat missing data. Use='pairwise" finds pairwise complete correlations. } \item{cor}{What kind of correlation to find. The default is Pearson. } \item{weight}{Should cases be weighted? Default, no.} \item{correct}{If finding tetrachoric or polychoric correlations, what correction should be applied to empty cells (defaults to .5)} \item{Target}{If doing target rotations (e.g., TargetQ or TargetT), then the Target must be specified. If TargetT, this may be a matrix, if TargetQ, this must be a list. (Strange property of GPARotation.)} } \details{The factor correlations are found using the approach discussed by Gorsuch (1983) and uses the weights matrices found by \eqn{w=S R^{-1}} and \eqn{r = w' R w} where S is the structure matrix and is \eqn{s= F \Phi}. The resulting correlations may be adjusted for the factor score variances (the diagonal of r) (the default). For factor loading vectors of F1 and F2 the measure of factor congruence, phi, is \deqn{ \phi = \frac{\sum F_1 F_2}{\sqrt{\sum(F_1^2)\sum(F_2^2)}} .}{{phi = sum(F1 F2)/sqrt(sum(F1^2) sum(F2^2))} } and is also found in \code{\link{factor.congruence}}. For comparisons of factor solutions from 1 to n, use \code{\link{bassAckward}}. This function just compares two solutions from the same correlation/data matrix. \code{\link{factor.congruence}} can be used to compare any two sets of factor loadings. Note that alternative ways of finding weights (e.g., regression, Bartlett, tenBerge) will produce somewhat different results. tenBerge produces weights that maintain the factor correlations in the factor scores. } \value{ \item{Call}{The function call} \item{r}{The factor intercorrelations} \item{congruence}{The Burt/Tucker coefficient of congruence} \item{f1}{The first factor analysis} \item{f2}{The second factor analysis} } \references{ Gorsuch, Richard, (1983) Factor Analysis. Lawrence Erlebaum Associates. Burt, Cyril (1948) The factorial study of temperamental traits. British Journal of Statistical Psychology, 1(3) 178-203. Horn, John L. (1973) On extension analysis and its relation to correlations between variables and factor scores. Multivariate Behavioral Research, 8, (4), 477-489. Lorenzo-Seva, U. and ten Berge, J. M. F. (2006). Tucker's congruence coefficient as a meaningful index of factor similarity. Methodology: European Journal of Research Methods for the Behavioral and Social Sciences, 2(2):57-64. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \note{Useful for comparing factor solutions from the same data. Will not work for different data sets } \seealso{ \code{\link{fa}}, \code{\link{pca}}, \code{\link{omega}} and \code{\link{iclust}}, and \code{\{link{bassAckward}} for alternative hierarchical solutions. \code{\link{fa.extend}} and \code{\link{fa.extension}} for other uses of factor - item correlations. } \examples{ faCor(Thurstone,nfactors=c(2,3)) #compare two solutions to the Thurstone problem faCor(psychTools::bfi[1:25],nfactors=c(5,5),fm=c("minres","pca")) #compare pca and fa solutions #compare two levels of factor extraction, and then find the correlations of the scores faCor(psychTools::bfi[1:25],nfactors=c(3,5)) #based upon linear algebra f3 <- fa(psychTools::bfi[1:25],3,scores="tenBerge") f5 <- fa(psychTools::bfi[1:25],5 ,scores="tenBerge") cor2(f3$scores,f5$scores) #the correlation between the factor score estimates } \keyword{multivariate } \keyword{ models}psych/man/factor2cluster.Rd0000644000176200001440000000453113564366325015424 0ustar liggesusers\name{factor2cluster} \alias{factor2cluster} \title{ Extract cluster definitions from factor loadings } \description{Given a factor or principal components loading matrix, assign each item to a cluster corresponding to the largest (signed) factor loading for that item. Essentially, this is a Very Simple Structure approach to cluster definition that corresponds to what most people actually do: highlight the largest loading for each item and ignore the rest. } \usage{ factor2cluster(loads, cut = 0,aslist=FALSE) } \arguments{ \item{loads}{either a matrix of loadings, or the result of a factor analysis/principal components analyis with a loading component } \item{cut}{Extract items with absolute loadings > cut} \item{aslist}{if TRUE, Return a keys list, else return a keys matrix (old style) } } \details{A factor/principal components analysis loading matrix is converted to a cluster (-1,0,1) definition matrix where each item is assigned to one and only one cluster. This is a fast way to extract items that will be unit weighted to form cluster composites. Use this function in combination with cluster.cor to find the corrleations of these composite scores. A typical use in the SAPA project is to form item composites by clustering or factoring (see \code{\link{ICLUST}}, \code{\link{principal}}), extract the clusters from these results (\code{\link{factor2cluster}}), and then form the composite correlation matrix using \code{\link{cluster.cor}}. The variables in this reduced matrix may then be used in multiple R procedures using mat.regress. The input may be a matrix of item loadings, or the output from a factor analysis which includes a loadings matrix. } \value{a keys list (new style or a matrix of -1,0,1 cluster definitions for each item. } \references{ \url{https://personality-project.org/r/r.vss.html} } \author{ \url{https://personality-project.org/revelle.html} \cr Maintainer: William Revelle \email{ revelle@northwestern.edu } } \seealso{\code{\link{cluster.cor}}, \code{\link{factor2cluster}}, \code{\link{fa}}, \code{\link{principal}}, \code{\link{ICLUST}} \code{\link{make.keys}}, \code{\link{keys2list}} } \examples{ #matches the factanal example f4 <- fa(Harman74.cor$cov,4,rotate="varimax") factor2cluster(f4) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/scaling.fits.Rd0000644000176200001440000000317013715123360015030 0ustar liggesusers\name{scaling.fits} \alias{scaling.fits} \title{ Test the adequacy of simple choice, logistic, or Thurstonian scaling. } \description{Given a matrix of choices and a vector of scale values, how well do the scale values capture the choices? That is, what is size of the squared residuals given the model versus the size of the squared choice values? } \usage{ scaling.fits(model, data, test = "logit", digits = 2, rowwise = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{model}{A vector of scale values } \item{data}{ A matrix or dataframe of choice frequencies } \item{test}{ "choice", "logistic", "normal" } \item{digits}{ Precision of answer } \item{rowwise}{Are the choices ordered by column over row (TRUE) or row over column False)} } \details{How well does a model fit the data is the classic problem of all of statistics. One fit statistic for scaling is the just the size of the residual matrix compared to the original estimates. } \value{ \item{GF }{Goodness of fit of the model} \item{original }{Sum of squares for original data} \item{resid }{Sum of squares for residuals given the data and the model} \item{residual }{Residual matrix} } \references{Revelle, W. (in preparation) Introduction to psychometric theory with applications in R, Springer. \url{https://personality-project.org/r/book/}} \author{ William Revelle } \note{ Mainly for demonstration purposes for a course on psychometrics } \seealso{ \code{\link{thurstone}}, \code{\link[psychTools]{vegetables}}} % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ models} psych/man/alpha.Rd0000644000176200001440000003755214533770467013563 0ustar liggesusers\name{alpha} \alias{alpha} \alias{alpha.scale} \alias{alpha.ci} \alias{alpha2r} \title{Find two estimates of reliability: Cronbach's alpha and Guttman's Lambda 6. } \description{ Internal consistency measures of reliability range from \eqn{\omega_h}{omega_hierchical} to \eqn{\alpha}{alpha} to \eqn{\omega_t}{omega_total}. This function reports two estimates: Cronbach's coefficient \eqn{\alpha}{alpha} and Guttman's \eqn{\lambda_6}{lambda_6}. Also reported are item - whole correlations, \eqn{\alpha}{alpha} if an item is omitted, and item means and standard deviations. } \usage{ alpha(x, keys=NULL,cumulative=FALSE, title=NULL, max=10,na.rm = TRUE, check.keys=FALSE,n.iter=1,delete=TRUE,use="pairwise",warnings=TRUE, n.obs=NULL,impute=NULL, discrete=TRUE ) alpha.ci(alpha,n.obs,n.var=NULL,p.val=.05,digits=2) #returns an invisible object alpha2r(alpha, n.var) } \arguments{ \item{x}{A data.frame or matrix of data, or a covariance or correlation matrix } \item{keys}{If some items are to be reversed keyed, then either specify the direction of all items or just a vector of which items to reverse } \item{title}{Any text string to identify this run} \item{cumulative}{should means reflect the sum of items or the mean of the items. The default value is means.} \item{max}{the number of categories/item to consider if reporting category frequencies. Defaults to 10, passed to \code{link{response.frequencies}} } \item{na.rm}{The default is to remove missing values and find pairwise correlations} \item{check.keys}{if TRUE, then find the first principal component and reverse key items with negative loadings. Give a warning if this happens. } \item{n.iter}{Number of iterations if bootstrapped confidence intervals are desired} \item{delete}{Delete items with no variance and issue a warning} \item{use}{Options to pass to the cor function: "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs". The default is "pairwise"} \item{warnings}{By default print a warning and a message that items were reversed. Suppress the message if warnings = FALSE} \item{alpha}{The value to use for confidence intervals} \item{n.obs}{If using correlation matrices as input, by specify the number of observations, we can find confidence intervals} \item{impute}{How should we impute missing data? Not at all, medians, or means} \item{discrete}{If TRUE, then report frequencies by categories.} \item{n.var}{Number of items in the scale (to find r.bar)} \item{p.val}{width of confidence interval (pval/2 to 1-p.val/2) } \item{digits}{How many digits to use for alpha.ci} } \details{Alpha is one of several estimates of the internal consistency reliability of a test. Surprisingly, more than a century after Spearman (1904) introduced the concept of reliability to psychologists, there are still multiple approaches for measuring it. Although very popular, Cronbach's \eqn{\alpha} (1951) underestimates the reliability of a test and over estimates the first factor saturation. \eqn{\alpha}{alpha} (Cronbach, 1951) is the same as Guttman's \eqn{\lambda}{lambda}3 (Guttman, 1945) and may be found by \deqn{ \lambda_3 = \frac{n}{n-1}\Bigl(1 - \frac{tr(\vec{V})_x}{V_x}\Bigr) = \frac{n}{n-1} \frac{V_x - tr(\vec{V}_x)}{V_x} = \alpha }{Lambda 3 = (n)/(n-1)(1-tr(Vx)/(Vx) = (n)/(n-1)(Vx-tr(Vx)/Vx = alpha} Perhaps because it is so easy to calculate and is available in most commercial programs, alpha is without doubt the most frequently reported measure of internal consistency reliability. Alpha is the mean of all possible spit half reliabilities (corrected for test length). For a unifactorial test, it is a reasonable estimate of the first factor saturation, although if the test has any microstructure (i.e., if it is ``lumpy") coefficients \eqn{\beta}{beta} (Revelle, 1979; see \code{\link{ICLUST}}) and \eqn{\omega_h}{omega_hierchical} (see \code{\link{omega}}) are more appropriate estimates of the general factor saturation. \eqn{\omega_t}{omega_total} (see \code{\link{omega}}) is a better estimate of the reliability of the total test. Guttman's Lambda 6 (G6) considers the amount of variance in each item that can be accounted for the linear regression of all of the other items (the squared multiple correlation or smc), or more precisely, the variance of the errors, \eqn{e_j^2}, and is \deqn{ \lambda_6 = 1 - \frac{\sum e_j^2}{V_x} = 1 - \frac{\sum(1-r_{smc}^2)}{V_x} .}{lambda 6 = 1 - sum(e^2)/Vx = 1-sum(1-r^2(smc))/Vx.} The squared multiple correlation is a lower bound for the item communality and as the number of items increases, becomes a better estimate. G6 is also sensitive to lumpyness in the test and should not be taken as a measure of unifactorial structure. For lumpy tests, it will be greater than alpha. For tests with equal item loadings, alpha > G6, but if the loadings are unequal or if there is a general factor, G6 > alpha. alpha is a generalization of an earlier estimate of reliability for tests with dichotomous items developed by Kuder and Richardson, known as KR20, and a shortcut approximation, KR21. (See Revelle, in prep). Alpha and G6 are both positive functions of the number of items in a test as well as the average intercorrelation of the items in the test. When calculated from the item variances and total test variance, as is done here, raw alpha is sensitive to differences in the item variances. Standardized alpha is based upon the correlations rather than the covariances. A useful index of the quality of the test that is linear with the number of items and the average correlation is the Signal/Noise ratio where \deqn{s/n = \frac{n \bar{r}}{1-\bar{r}}}{s/n = n r/(1-r)} (Cronbach and Gleser, 1964; Revelle and Condon (2019)). More complete reliability analyses of a single scale can be done using the \code{\link{omega}} function which finds \eqn{\omega_h}{omega_hierchical} and \eqn{\omega_t}{omega_total} based upon a hierarchical factor analysis. Alternative functions \code{\link{score.items}} and \code{\link{cluster.cor}} will also score multiple scales and report more useful statistics. ``Standardized" alpha is calculated from the inter-item correlations and will differ from raw alpha. Four alternative item-whole correlations are reported, three are conventional, one unique. raw.r is the correlation of the item with the entire scale, not correcting for item overlap. std.r is the correlation of the item with the entire scale, if each item were standardized. r.drop is the correlation of the item with the scale composed of the remaining items. Although each of these are conventional statistics, they have the disadvantage that a) item overlap inflates the first and b) the scale is different for each item when an item is dropped. Thus, the fourth alternative, r.cor, corrects for the item overlap by subtracting the item variance but then replaces this with the best estimate of common variance, the smc. This is similar to a suggestion by Cureton (1966). If some items are to be reversed keyed then they can be specified by either item name or by item location. (Look at the 3rd and 4th examples.) Automatic reversal can also be done, and this is based upon the sign of the loadings on the first principal component (Example 5). This requires the check.keys option to be TRUE. Previous versions defaulted to have check.keys=TRUE, but some users complained that this made it too easy to find alpha without realizing that some items had been reversed (even though a warning was issued!). Thus, I have set the default to be check.keys=FALSE with a warning that some items need to be reversed (if this is the case). To suppress these warnings, set warnings=FALSE. Scores are based upon the simple averages (or totals) of the items scored. Thus, if some items are missing, the scores reflect just the items answered. This is particularly problematic if using total scores (with the cumulative=TRUE option). To impute missing data using either means or medians, use the \code{\link{scoreItems}} function. Reversed items are subtracted from the maximum + minimum item response for all the items. When using raw data, standard errors for the raw alpha are calculated using equation 2 and 3 from Duhhachek and Iacobucci (2004). This is problematic because some simulations suggest these values are too small. It is probably better to use bootstrapped values. \code{\link{alpha.ci}} finds confidence intervals using the Feldt et al. (1987) procedure. This procedure does not consider the internal structure of the test the way that the Duhachek and Iacobucci (2004) procedure does. That is, the latter considers the variance of the covariances, while the Feldt procedure is based upon just the mean covariance. In March, 2022, alpha.ci was finally fixed to follow the Feldt procedure. The confidence intervals reported by alpha use both the Feld and the Duhaceck and Iabocucci precedures. Note that these match for large values of N, but differ for smaller values. Because both of these procedures use normal theory, if you really care about confidence intervals, using the boot option (n.iter > 1) is recommended. Bootstrapped resamples are found if n.iter > 1. These are returned as the boot object. They may be plotted or described. The 2.5\% and 97.5\% values are returned in the boot.ci object. } \value{ \item{total }{a list containing} \item{raw_alpha}{alpha based upon the covariances} \item{std.alpha}{The standarized alpha based upon the correlations} \item{G6(smc)}{Guttman's Lambda 6 reliability} \item{average_r}{The average interitem correlation} \item{median_r}{The median interitem correlation} \item{mean}{For data matrices, the mean of the scale formed by averaging or summing the items (depending upon the cumulative option)} \item{sd}{For data matrices, the standard deviation of the total score} \item{alpha.drop }{A data frame with all of the above for the case of each item being removed one by one.} \item{item.stats}{A data frame including} \item{n}{number of complete cases for the item} \item{raw.r}{The correlation of each item with the total score, not corrected for item overlap.} \item{std.r}{The correlation of each item with the total score (not corrected for item overlap) if the items were all standardized} \item{r.cor}{Item whole correlation corrected for item overlap and scale reliability} \item{r.drop}{Item whole correlation for this item against the scale without this item} \item{mean}{for data matrices, the mean of each item} \item{sd}{For data matrices, the standard deviation of each item} \item{response.freq}{For data matrices, the frequency of each item response (if less than 20) May be suppressed by specifying discretet=FALSE.} \item{scores}{Scores are by default simply the average response for all items that a participant took. If cumulative=TRUE, then these are sum scores. Note, this is dangerous if there are lots of missing values.} \item{boot.ci}{The lower, median, and upper ranges of the 95\% confidence interval based upon the bootstrap.} \item{boot}{a 6 column by n.iter matrix of boot strapped resampled values} \item{Unidim}{An index of unidimensionality} \item{Fit}{The fit of the off diagonal matrix} } \references{ Cronbach, L.J. (1951) Coefficient alpha and the internal strucuture of tests. Psychometrika, 16, 297-334. Cureton, E. (1966). Corrected item-test correlations. Psychometrika, 31(1):93-96. Cronbach, L.J. and Gleser G.C. (1964)The signal/noise ratio in the comparison of reliability coefficients. Educational and Psychological Measurement, 24 (3) 467-480. Duhachek, A. and Iacobucci, D. (2004). Alpha's standard error (ase): An accurate and precise confidence interval estimate. Journal of Applied Psychology, 89(5):792-808. Feldt, L. S., Woodruff, D. J., & Salih, F. A. (1987). Statistical inference for coefficient alpha. Applied Psychological Measurement (11) 93-103. Guttman, L. (1945). A basis for analyzing test-retest reliability. Psychometrika, 10 (4), 255-282. Revelle, W. (in preparation) An introduction to psychometric theory with applications in {R}. Springer. (Available online at \url{https://personality-project.org/r/book/}). Revelle, W. Hierarchical Cluster Analysis and the Internal Structure of Tests. Multivariate Behavioral Research, 1979, 14, 57-74. Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv Revelle, W. and Condon, D.M. (2018) Reliability. In Irwing, P., Booth, T. and Hughes, D. (Eds). the Wiley-Blackwell Handbook of Psychometric Testing: A multidisciplinary reference on survey, scale, and test development. Revelle, W. and Zinbarg, R. E. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 74 (1) 1145-154. } \author{William Revelle } \note{By default, items that correlate negatively with the overall scale will be reverse coded. This option may be turned off by setting check.keys = FALSE. If items are reversed, then each item is subtracted from the minimum item response + maximum item response where min and max are taken over all items. Thus, if the items intentionally differ in range, the scores will be off by a constant. See \code{\link{scoreItems}} for a solution. Two item level statistics are worth comparing: the mean interitem r and the median interitem r. If these differ very much, that is a sign that the scale is not particularly homogeneous. Variables without variance do not contribute to reliability but do contribute to total score. They are dropped with a warning that they had no variance and were thus dropped. However the scores found still include these values in the calculations. If the data have been preprocessed by the dplyr package, a strange error can occur. alpha expects either data.frames or matrix input. data.frames returned by dplyr have had three extra classes added to them which causes alpha to break. The solution is merely to change the class of the input to "data.frame". Two experimental measures of Goodness of Fit are returned in the output: Unidim and Fit. They are not printed or displayed, but are available for analysis. The first is an index of how well the modeled average correlations actually reproduce the original correlation matrix. The second is how well the modeled correlations reproduce the off diagonal elements of the matrix. Both are indices of squared residuals compared to the squared original correlations. These two measures are under development and might well be modified or dropped in subsequent versions.} \seealso{ \code{\link{omega}}, \code{\link{ICLUST}}, \code{\link{guttman}}, \code{\link{scoreItems}}, \code{\link{cluster.cor}}} \examples{ set.seed(42) #keep the same starting values #four congeneric measures r4 <- sim.congeneric() alpha(r4) #nine hierarchical measures -- should actually use omega r9 <- sim.hierarchical() alpha(r9) # examples of two independent factors that produce reasonable alphas #this is a case where alpha is a poor indicator of unidimensionality two.f <- sim.item(8) #specify which items to reverse key by name alpha(two.f,keys=c("V3","V4","V5","V6")) cov.two <- cov(two.f) alpha(cov.two,check.keys=TRUE) #automatic reversal base upon first component alpha(two.f,check.keys=TRUE) #note that the median is much less than the average R #this suggests (correctly) that the 1 factor model is probably wrong #an example with discrete item responses -- show the frequencies items <- sim.congeneric(N=500,short=FALSE,low=-2,high=2, categorical=TRUE) #500 responses to 4 discrete items with 5 categories a4 <- alpha(items$observed) #item response analysis of congeneric measures a4 #summary just gives Alpha summary(a4) alpha2r(alpha = .74,n.var=4) #because alpha.ci returns an invisible object, you need to print it print(alpha.ci(.74, 100,p.val=.05,n.var=4)) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{multivariate }% __ONLY ONE__ keyword per line psych/man/factor.congruence.Rd0000755000176200001440000001370114150023721016047 0ustar liggesusers\name{factor.congruence} \alias{factor.congruence} \alias{fa.congruence} \title{Coefficient of factor congruence } \description{Given two sets of factor loadings, report their degree of congruence (vector cosine). Although first reported by Burt (1937,1 1948), this is frequently known as the Tucker index of factor congruence. Cohen's Profile similarity may be found as well. } \usage{ factor.congruence(x, y=NULL,digits=2,use=NULL,structure=FALSE) fa.congruence(x, y=NULL,digits=2,use=NULL,structure=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A matrix of factor loadings or a list of matrices of factor loadings} \item{y}{ A second matrix of factor loadings (if x is a list, then y may be empty)} \item{digits}{Round off to digits} \item{use}{If NULL, then no loading matrices may contain missing values. If use="complete" then variables with any missing loadings are dropped (with a warning)} \item{structure}{If TRUE, find the factor congruences based upon the Structure matrix (if available), otherwise based upon the pattern matrix.} } \details{Find the coefficient of factor congruence between two sets of factor loadings. Factor congruences are the cosines of pairs of vectors defined by the loadings matrix and based at the origin. Thus, for loadings that differ only by a scaler (e.g. the size of the eigen value), the factor congruences will be 1. For factor loading vectors of F1 and F2 the measure of factor congruence, phi, is \deqn{ \phi = \frac{\sum F_1 F_2}{\sqrt{\sum(F_1^2)\sum(F_2^2)}} .}{phi = sum(F1 F2)/sqrt(sum(F1^2) sum(F2^2)) } It is an interesting exercise to compare factor congruences with the correlations of factor loadings. Factor congruences are based upon the raw cross products, while correlations are based upon centered cross products. That is, correlations of factor loadings are cosines of the vectors based at the mean loading for each factor. \deqn{ \phi = \frac{\sum (F_1-a) (F_2 - b)}{\sqrt{\sum((F_1-a)^2)\sum((F_2-b)^2)}} .}{phi = sum((F1-a)(F2-b))/sqrt(sum((F1-a)^2) sum((F2-b)^2)) }. For congruence coefficients, a = b= 0. For correlations a=mean F1, b= mean F2. Input may either be matrices or factor analysis or principal components analyis output (which includes a loadings object), or a mixture of the two. To compare more than two solutions, x may be a list of matrices, all of which will be compared. Normally, all factor loading matrices should be complete (have no missing loadings). In the case where some loadings are missing, if the use option is specified, then variables with missing loadings are dropped. If the data are zero centered, this is the correlation, if the data are centered around the scale midpoint (M), this is Cohen's Similarity coefficient. See examples. If M is not specified, it is found as the midpoint of the items in x and y. \code{\link{cohen.profile}} applies the \code{\link{congruence}} function to data centered around M. M may be specified, or found from the data. The last example is taken from Cohen (1969). \code{\link{distance}} finds the generalized distance as a function of r. City block (r=1), Euclidean (r=2) or weighted towards maximimum (r >2). } \value{A matrix of factor congruences or distances. } \references{ Burt, Cyril (1948) Methods of factor-analysis with and without successive approximation. British Journal of Educational Psychology, 7 (2) 172-195. Burt, Cyril (1948) The factorial study of temperamental traits. British Journal of Statistical Psychology, 1(3) 178-203. Cohen, Jacob (1969), rc: A profile similarity coefficient invariant over variable reflection. Psychological Bulletin, 71 (4) 281-284. Lorenzo-Seva, U. and ten Berge, J. M. F. (2006). Tucker's congruence coefficient as a meaningful index of factor similarity. Methodology: European Journal of Research Methods for the Behavioral and Social Sciences, 2(2):57-64. Gorsuch, Richard, (1983) Factor Analysis. Lawrence Erlebaum Associates. Revelle, W. (In preparation) An Introduction to Psychometric Theory with applications in R (\url{https://personality-project.org/r/book/}) } \author{ \email{revelle@northwestern.edu} \cr \url{https://personality-project.org/revelle.html}} \seealso{ \code{\link{principal}}, \code{\link{fa}}. \code{\link{faCor}} will find factor correlations as well as congruences. } \examples{ #factor congruence of factors and components, both rotated #fa <- fa(Harman74.cor$cov,4) #pc <- principal(Harman74.cor$cov,4) #factor.congruence(fa,pc) # RC1 RC3 RC2 RC4 #MR1 0.98 0.41 0.28 0.32 #MR3 0.35 0.96 0.41 0.31 #MR2 0.23 0.16 0.95 0.28 #MR4 0.28 0.38 0.36 0.98 #factor congruence without rotation #fa <- fa(Harman74.cor$cov,4,rotate="none") #pc <- principal(Harman74.cor$cov,4,rotate="none") #factor.congruence(fa,pc) #just show the beween method congruences # PC1 PC2 PC3 PC4 #MR1 1.00 -0.04 -0.06 -0.01 #MR2 0.15 0.97 -0.01 -0.15 #MR3 0.31 0.05 0.94 0.11 #MR4 0.07 0.21 -0.12 0.96 #factor.congruence(list(fa,pc)) #this shows the within method congruence as well # MR1 MR2 MR3 MR4 PC1 PC2 PC3 PC4 #MR1 1.00 0.11 0.25 0.06 1.00 -0.04 -0.06 -0.01 #MR2 0.11 1.00 0.06 0.07 0.15 0.97 -0.01 -0.15 #MR3 0.25 0.06 1.00 0.01 0.31 0.05 0.94 0.11 #MR4 0.06 0.07 0.01 1.00 0.07 0.21 -0.12 0.96 #PC1 1.00 0.15 0.31 0.07 1.00 0.00 0.00 0.00 #PC2 -0.04 0.97 0.05 0.21 0.00 1.00 0.00 0.00 #PC3 -0.06 -0.01 0.94 -0.12 0.00 0.00 1.00 0.00 #PC4 -0.01 -0.15 0.11 0.96 0.00 0.00 0.00 1.00 #pa <- fa(Harman74.cor$cov,4,fm="pa") # factor.congruence(fa,pa) # PA1 PA3 PA2 PA4 #Factor1 1.00 0.61 0.46 0.55 #Factor2 0.61 1.00 0.50 0.60 #Factor3 0.46 0.50 1.00 0.57 #Factor4 0.56 0.62 0.58 1.00 #compare with #round(cor(fa$loading,pc$loading),2) # RC1 RC3 RC2 RC4 #MR1 0.99 -0.18 -0.33 -0.34 #MR3 -0.33 0.96 -0.16 -0.43 #MR2 -0.29 -0.46 0.98 -0.21 #MR4 -0.44 -0.30 -0.22 0.98 } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/KMO.Rd0000644000176200001440000000462314537073020013076 0ustar liggesusers\name{KMO} \alias{KMO} \title{Find the Kaiser, Meyer, Olkin Measure of Sampling Adequacy} \description{Henry Kaiser (1970) introduced an Measure of Sampling Adequacy (MSA) of factor analytic data matrices. Kaiser and Rice (1974) then modified it. This is just a function of the squared elements of the `image' matrix compared to the squares of the original correlations. The overall MSA as well as estimates for each item are found. The index is known as the Kaiser-Meyer-Olkin (KMO) index.} \usage{ KMO(r) } \arguments{ \item{r}{A correlation matrix or a data matrix (correlations will be found)} } \details{Let \eqn{S^2 = diag(R^{-1})^{-1} } and \eqn{Q = SR^{-1}S}. Then Q is said to be the anti-image intercorrelation matrix. Let \eqn{sumr2 = \sum{R^2}} and \eqn{sumq2 = \sum{Q^2}} for all off diagonal elements of R and Q, then \eqn{SMA=sumr2/(sumr2 + sumq2)}. Although originally MSA was 1 - sumq2/sumr2 (Kaiser, 1970), this was modified in Kaiser and Rice, (1974) to be \eqn{SMA=sumr2/(sumr2 + sumq2)}. This is the formula used by Dziuban and Shirkey (1974) and by SPSS. In his delightfully flamboyant style, Kaiser (1975) suggested that KMO > .9 were marvelous, in the .80s, mertitourious, in the .70s, middling, in the .60s, medicore, in the 50s, miserable, and less than .5, unacceptable. An alternative measure of whether the matrix is factorable is the Bartlett test \code{\link{cortest.bartlett}} which tests the degree that the matrix deviates from an identity matrix. } \value{ \itemize{ \item MSA: The overall Measure of Sampling Adequacy \item MSAi: The measure of sampling adequacy for each item \item Image: The Image correlation matrix (Q) } } \references{ H.~F. Kaiser. (1970) A second generation little jiffy. Psychometrika, 35(4):401--415. H.~F. Kaiser and J.~Rice. (1974) Little jiffy, mark iv. Educational and Psychological Measurement, 34(1):111--117. H.F. Kaiser. 1974) An index of factor simplicity. Psychometrika, 39 (1) 31-36. Dziuban, Charles D. and Shirkey, Edwin C. (1974) When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81 (6) 358 - 361. } \author{ William Revelle } \seealso{ See Also as \code{\link{fa}}, \code{\link{cortest.bartlett}}, \code{\link{Harman.political}}. } \examples{ KMO(Thurstone) KMO(Harman.political) #compare to the results in Dziuban and Shirkey (1974) } \keyword{ multivariate } \keyword{ models} psych/man/geometric.mean.Rd0000644000176200001440000000206511243410641015336 0ustar liggesusers\name{geometric.mean} \alias{geometric.mean} \title{ Find the geometric mean of a vector or columns of a data.frame. } \description{The geometric mean is the nth root of n products or e to the mean log of x. Useful for describing non-normal, i.e., geometric distributions. } \usage{ geometric.mean(x,na.rm=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a vector or data.frame} \item{na.rm}{remove NA values before processing} } \details{Useful for teaching how to write functions, also useful for showing the different ways of estimating central tendency. } \value{geometric mean(s) of x or x.df. } \author{ William Revelle} \note{ Not particularly useful if there are elements that are <= 0. } \seealso{ \code{\link{harmonic.mean}}, \code{\link{mean}} } \examples{ x <- seq(1,5) x2 <- x^2 x2[2] <- NA X <- data.frame(x,x2) geometric.mean(x) geometric.mean(x2) geometric.mean(X) geometric.mean(X,na.rm=FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/lowerUpper.Rd0000644000176200001440000000431513464054324014616 0ustar liggesusers\name{lowerUpper} \alias{lowerUpper} \title{Combine two square matrices to have a lower off diagonal for one, upper off diagonal for the other} \description{ When reporting correlation matrices for two samples (e.g., males and females), it is convenient to show them as one matrix, with entries below the diagonal representing one matrix, and entries above the diagonal the other matrix. It is also useful to compare a correlation matrix with the residuals from a fitted (e.g., factor) model. } \usage{ lowerUpper(lower, upper=NULL, diff=FALSE) } \arguments{ \item{lower}{A square matrix} \item{upper}{A square matrix of the same size as the first (if omitted, then the matrix is converted to two symmetric matrices).} \item{diff}{Find the difference between the first and second matrix and put the results in the above the diagonal entries. } } \details{If just one matrix is provided (i.e., upper is missing), it is decomposed into two square matrices, one equal to the lower off diagonal entries, the other to the upper off diagonal entries. In the normal case two symmetric matrices are provided and combined into one non-symmetric matrix with the lower off diagonals representing the lower matrix and the upper off diagonals representing the upper matrix. If diff is true, the upper off diagonal matrix reflects the differences between the two matrices. } \value{Either one matrix or a list of two} \author{William Revelle} \seealso{\code{\link[psychTools]{read.clipboard.lower}}, \code{\link{corPlot}} } \examples{ b1 <- Bechtoldt.1 b2 <- Bechtoldt.2 b12 <- lowerUpper(b1,b2) cor.plot(b12) diff12 <- lowerUpper(b1,b2,diff=TRUE) corPlot(t(diff12),numbers=TRUE,main="Bechtoldt1 and the differences from Bechtoldt2") #Compare r and partial r lower <- lowerCor(sat.act) upper <- partial.r(sat.act) both = lowerUpper(lower,upper) corPlot(both,numbers=TRUE,main="r and partial r for the sat.act data set") #now show the differences both = lowerUpper(lower,upper,diff=TRUE) corPlot(both,numbers=TRUE,main="Differences between r and partial r for the sat.act data set") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate }% at least one, from doc/KEYWORDS psych/man/sim.hierarchical.Rd0000644000176200001440000001431514477470664015676 0ustar liggesusers\name{sim.hierarchical} \alias{sim.hierarchical} \alias{make.hierarchical} \alias{sim.bonds} \title{Create a population or sample correlation matrix, perhaps with hierarchical structure. } \description{Create a population orthogonal or hierarchical correlation matrix from a set of factor loadings and factor intercorrelations. Samples of size n may be then be drawn from this population. Return either the sample data, sample correlations, or population correlations. This is used to create sample data sets for instruction and demonstration. } \usage{ sim.hierarchical(gload=NULL, fload=NULL, n = 0, raw = TRUE,mu = NULL, categorical=FALSE, low=-3,high=3,threshold=NULL) sim.bonds(nvar=9,nf=NULL, loads=c(0,0,.5,.6),validity=.8) make.hierarchical(gload=NULL, fload=NULL, n = 0, raw = FALSE) #deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{gload}{ Loadings of group factors on a general factor. Defaults to c(.9,.8,.7) } \item{fload}{ Loadings of items on the group factors. Defaults to matrix(c(.8,.7,.6,rep(0,9),.7,.6,.5,rep(0,9),.6,.5,.4), ncol=3)} \item{n}{ Number of subjects to generate: N=0 => population values } \item{raw}{ raw=TRUE, report the raw data, raw=FALSE, report the sample correlation matrix. } \item{mu}{means for the individual variables} \item{low}{lower cutoff for categorical data} \item{categorical}{If True, then create categorical data} \item{threshold}{If categorical is TRUE, and binary output is desired, what is the threshold to convert continuous scores into 0/1. May be a vector.} \item{high}{Upper cuttoff for categorical data} \item{nvar}{Number of variables to simulate} \item{loads}{A vector of loadings that will be sampled (rowwise) to define the factors} \item{validity}{The factor loadings of `pure' measures of the factor.} \item{nf}{Number of factors to generate in sim.bonds} } \details{Many personality and cognitive tests have a hierarchical factor structure. For demonstration purposes, it is useful to be able to create such matrices, either with population values, or sample values. Given a matrix of item factor loadings (fload) and of loadings of these factors on a general factor (gload), we create a population correlation matrix by using the general factor law \eqn{ R \approx F' \theta F + U^2} where \eqn{\theta = g'g} The default is to return population correlation matrices. Sample correlation matrices are generated if n > 0. Raw data are returned if raw = TRUE. The default values for gload and fload create a data matrix discussed by Jensen and Weng, 1994. In order to properly simulate polytomous items, the categorical option will round continuous scores into integers. To simulate dichotomous items, the threshold vector may be used to specify the value at which the continuous values are cut into 0/1. If the length of threshold is less than the number of variables, the vector will be sampled (with replacement) to fill it out. Although written to create hierarchical structures, if the gload matrix is all 0, then a non-hierarchical structure will be generated. Yet another model is that of Godfrey H. Thomson (1916) who suggested that independent bonds could produce the same factor structure as a g factor model. This is simulated in \code{\link{sim.bonds}}. Compare the \code{\link{omega}} solutions for a \code{\link{sim.hierarchical}} with a \code{\link{sim.bonds}} model. Both produce reasonable values of omega, although the one was generated without a general factor. } \value{ \item{r}{a matrix of correlations} \item{model}{The population correlation matrix} \item{observed}{The simulated data matrix with the defined structure} \item{theta}{The latent factor scores used to generate the data. Compare how these correlate with the observed data with the results from \code{\link{omega}}.} \item{sl}{The Schmid Leiman transformed factor loadings. These may be used to test factor scoring problem. } } \references{ \url{https://personality-project.org/r/r.omega.html } \cr Jensen, A.R., Weng, L.J. (1994) What is a Good g? Intelligence, 18, 231-258. Godfrey H. Thomson (1916) A hierarchy without a general factor, British Journal of Psychology, 8, 271-281. } \author{ William Revelle } \seealso{ \code{\link{omega}}, \code{\link{schmid}}, \code{\link{ICLUST}}, \code{\link{VSS}} for ways of analyzing these data. Also see \code{\link{sim.structure}} to simulate a variety of structural models (e.g., multiple correlated factor models). } \examples{ gload <- gload<-matrix(c(.9,.8,.7),nrow=3) # a higher order factor matrix fload <-matrix(c( #a lower order (oblique) factor matrix .8,0,0, .7,0,.0, .6,0,.0, 0,.7,.0, 0,.6,.0, 0,.5,0, 0,0,.6, 0,0,.5, 0,0,.4), ncol=3,byrow=TRUE) jensen <- sim.hierarchical(gload,fload) #the test set used by omega round(jensen,2) set.seed(42) #for reproducible results jensen <- sim.hierarchical(n=10000,categorical =TRUE, threshold =c(-1,0,1)) #use the same gload and fload values, but produce the data #items have three levels of difficulty #Compare factor scores using the sl model with those that generated the data lowerCor(jensen$theta) #the correlations of the factors fs <- factor.scores(jensen$observed, jensen$sl) #find factor scores from the data lowerCor(fs$scores) #these are now correlated cor2(fs$scores,jensen$theta) #correlation with the generating factors #compare this to a simulation of the bonds model set.seed(42) R <- sim.bonds() R$R #simulate a non-hierarchical structure fload <- matrix(c(c(c(.9,.8,.7,.6),rep(0,20)),c(c(.9,.8,.7,.6),rep(0,20)), c(c(.9,.8,.7,.6),rep(0,20)),c(c(c(.9,.8,.7,.6),rep(0,20)),c(.9,.8,.7,.6))),ncol=5) gload <- matrix(rep(0,5)) five.factor <- sim.hierarchical(gload,fload,500,TRUE) #create sample data set #do it again with a hierachical structure gload <- matrix(rep(.7,5) ) five.factor.g <- sim.hierarchical(gload,fload,500,TRUE) #create sample data set #compare these two with omega #not run #om.5 <- omega(five.factor$observed,5) #om.5g <- omega(five.factor.g$observed,5) } \keyword{ multivariate}% at least one, from doc/KEYWORDS \keyword{models }% __ONLY ONE__ keyword per line \keyword{datagen} psych/man/parcels.Rd0000644000176200001440000000553613463373304014112 0ustar liggesusers\name{parcels} \alias{parcels} \alias{keysort} \title{Find miniscales (parcels) of size 2 or 3 from a set of items} \description{Given a set of n items, form n/2 or n/3 mini scales or parcels of the most similar pairs or triplets of items. These may be used as the basis for subsequent scale construction or multivariate (e.g., factor) analysis. } \usage{ parcels(x, size = 3, max = TRUE, flip=TRUE,congruence = FALSE) keysort(keys) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A matrix/dataframe of items or a correlation/covariance matrix of items} \item{size}{Form parcels of size 2 or size 3} \item{flip}{if flip=TRUE, negative correlations lead to at least one item being negatively scored} \item{max}{Should item correlation/covariance be adjusted for their maximum correlation} \item{congruence}{Should the correlations be converted to congruence coefficients?} \item{keys}{Sort a matrix of keys to reflect item order as much as possible} } \details{Items used in measuring ability or other aspects of personality are typically not very reliable. One suggestion has been to form items into homogeneous item composites (HICs), Factorially Homogeneous Item Dimensions (FHIDs) or mini scales (parcels). Parcelling may be done rationally, factorially, or empirically based upon the structure of the correlation/covariance matrix. \code{link{parcels}} facilitates the finding of parcels by forming a keys matrix suitable for using in \code{\link{score.items}}. These keys represent the n/2 most similar pairs or the n/3 most similar triplets. The algorithm is straightforward: For size = 2, the correlation matrix is searched for the highest correlation. These two items form the first parcel and are dropped from the matrix. The procedure is repeated until there are no more pairs to form. For size=3, the three items with the greatest sum of variances and covariances with each other is found. This triplet is the first parcel. All three items are removed and the procedure then identifies the next most similar triplet. The procedure repeats until n/3 parcels are identified. } \value{ \item{keys}{A matrix of scoring keys to be used to form mini scales (parcels) These will be in order of importance, that is, the first parcel (P1) will reflect the most similar pair or triplet. The keys may also be sorted by average item order by using the keysort function.} } \references{ Cattell, R. B. (1956). Validation and intensification of the sixteen personality factor questionnaire. Journal of Clinical Psychology , 12 (3), 205 -214. } \author{William Revelle} \seealso{ \code{\link{score.items}} to score the parcels or \code{\link{iclust}} for an alternative way of forming item clusters. } \examples{ parcels(Thurstone) keys <- parcels(psychTools::bfi) keys <- keysort(keys) score.items(keys,psychTools::bfi) } \keyword{ multivariate }psych/man/rescale.Rd0000644000176200001440000000234510775754240014077 0ustar liggesusers\name{rescale} \alias{rescale} \title{Function to convert scores to ``conventional " metrics} \description{Psychologists frequently report data in terms of transformed scales such as ``IQ" (mean=100, sd=15, ``SAT/GRE" (mean=500, sd=100), ``ACT" (mean=18, sd=6), ``T-scores" (mean=50, sd=10), or ``Stanines" (mean=5, sd=2). The \code{\link{rescale}} function converts the data to standard scores and then rescales to the specified mean(s) and standard deviation(s). } \usage{ rescale(x, mean = 100, sd = 15,df=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A matrix or data frame } \item{mean}{Desired mean of the rescaled scores- may be a vector} \item{sd}{Desired standard deviation of the rescaled scores} \item{df}{if TRUE, returns a data frame, otherwise a matrix} } \value{A data.frame (default) or matrix of rescaled scores. } \author{ William Revelle } \seealso{ See Also \code{\link{scale}} } \examples{ T <- rescale(attitude,50,10) #all put on same scale describe(T) T1 <- rescale(attitude,seq(0,300,50),seq(10,70,10)) #different means and sigmas describe(T1) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line \keyword{univar} psych/man/ICLUST.graph.Rd0000644000176200001440000001612613571760343014564 0ustar liggesusers\name{ICLUST.graph} \alias{ICLUST.graph} \alias{iclust.graph} \title{ create control code for ICLUST graphical output} \description{Given a cluster structure determined by \code{\link{ICLUST}}, create dot code to describe the \code{\link{ICLUST}} output. To use the dot code, use either https://www.graphviz.org/ Graphviz or a commercial viewer (e.g., OmniGraffle). This function parallels \code{\link{ICLUST.rgraph}} which uses Rgraphviz. } \usage{ ICLUST.graph(ic.results, out.file,min.size=1, short = FALSE,labels=NULL, size = c(8, 6), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 12), rank.direction=c("RL","TB","LR","BT"), digits = 2, title = "ICLUST", ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ic.results}{output list from ICLUST } \item{out.file}{ name of output file (defaults to console) } \item{min.size}{draw a smaller node (without all the information) for clusters < min.size -- useful for large problems} \item{short}{if short==TRUE, don't use variable names} \item{labels}{vector of text labels (contents) for the variables} \item{size}{size of output } \item{node.font}{ Font to use for nodes in the graph } \item{edge.font}{ Font to use for the labels of the arrows (edges)} \item{rank.direction}{LR or RL } \item{digits}{ number of digits to show } \item{title}{ any title } \item{\dots}{ other options to pass } } \details{ Will create (or overwrite) an output file and print out the dot code to show a cluster structure. This dot file may be imported directly into a dot viewer (e.g., https://www.graphviz.org/). The "dot" language is a powerful graphic description language that is particulary appropriate for viewing cluster output. Commercial graphics programs (e.g., OmniGraffle) can also read (and clean up) dot files. ICLUST.graph takes the output from \code{\link{ICLUST}} results and processes it to provide a pretty picture of the results. Original variables shown as rectangles and ordered on the left hand side (if rank direction is RL) of the graph. Clusters are drawn as ellipses and include the alpha, beta, and size of the cluster. Edges show the cluster intercorrelations. It is possible to trim the output to not show all cluster information. Clusters < min.size are shown as small ovals without alpha, beta, and size information. Although it would be nice to process the dot code directly in R, the Rgraphviz package is difficult to use on all platforms and thus the dot code is written directly. } \value{ Output is a set of dot commands written either to console or to the output file. These commands may then be used as input to any "dot" viewer, e.g., Graphviz. } \references{ ICLUST: \url{https://personality-project.org/r/r.ICLUST.html}} \author{ \email{revelle@northwestern.edu } \cr \url{https://personality-project.org/revelle.html}} \seealso{ \code{\link{VSS.plot}}, \code{\link{ICLUST}}} \examples{ \dontrun{ test.data <- Harman74.cor$cov ic.out <- ICLUST(test.data) #out.file <- file.choose(new=TRUE) #create a new file to write the plot commands to #ICLUST.graph(ic.out,out.file) now go to graphviz (outside of R) and open the out.file you created print(ic.out,digits=2) } #test.data <- Harman74.cor$cov #my.iclust <- ICLUST(test.data) #ICLUST.graph(my.iclust) # # #digraph ICLUST { # rankdir=RL; # size="8,8"; # node [fontname="Helvetica" fontsize=14 shape=box, width=2]; # edge [fontname="Helvetica" fontsize=12]; # label = "ICLUST"; # fontsize=20; #V1 [label = VisualPerception]; #V2 [label = Cubes]; #V3 [label = PaperFormBoard]; #V4 [label = Flags]; #V5 [label = GeneralInformation]; #V6 [label = PargraphComprehension]; #V7 [label = SentenceCompletion]; #V8 [label = WordClassification]; #V9 [label = WordMeaning]; #V10 [label = Addition]; #V11 [label = Code]; #V12 [label = CountingDots]; #V13 [label = StraightCurvedCapitals]; #V14 [label = WordRecognition]; #V15 [label = NumberRecognition]; #V16 [label = FigureRecognition]; #V17 [label = ObjectNumber]; #V18 [label = NumberFigure]; #V19 [label = FigureWord]; #V20 [label = Deduction]; #V21 [label = NumericalPuzzles]; #V22 [label = ProblemReasoning]; #V23 [label = SeriesCompletion]; #V24 [label = ArithmeticProblems]; #node [shape=ellipse, width ="1"]; #C1-> V9 [ label = 0.78 ]; #C1-> V5 [ label = 0.78 ]; #C2-> V12 [ label = 0.66 ]; #C2-> V10 [ label = 0.66 ]; #C3-> V18 [ label = 0.53 ]; #C3-> V17 [ label = 0.53 ]; #C4-> V23 [ label = 0.59 ]; #C4-> V20 [ label = 0.59 ]; #C5-> V13 [ label = 0.61 ]; #C5-> V11 [ label = 0.61 ]; #C6-> V7 [ label = 0.78 ]; #C6-> V6 [ label = 0.78 ]; #C7-> V4 [ label = 0.55 ]; #C7-> V1 [ label = 0.55 ]; #C8-> V16 [ label = 0.5 ]; #C8-> V14 [ label = 0.49 ]; #C9-> C1 [ label = 0.86 ]; #C9-> C6 [ label = 0.86 ]; #C10-> C4 [ label = 0.71 ]; #C10-> V22 [ label = 0.62 ]; #C11-> V21 [ label = 0.56 ]; #C11-> V24 [ label = 0.58 ]; #C12-> C10 [ label = 0.76 ]; #C12-> C11 [ label = 0.67 ]; #C13-> C8 [ label = 0.61 ]; #C13-> V15 [ label = 0.49 ]; #C14-> C2 [ label = 0.74 ]; #C14-> C5 [ label = 0.72 ]; #C15-> V3 [ label = 0.48 ]; #C15-> C7 [ label = 0.65 ]; #C16-> V19 [ label = 0.48 ]; #C16-> C3 [ label = 0.64 ]; #C17-> V8 [ label = 0.62 ]; #C17-> C12 [ label = 0.8 ]; #C18-> C17 [ label = 0.82 ]; #C18-> C15 [ label = 0.68 ]; #C19-> C16 [ label = 0.66 ]; #C19-> C13 [ label = 0.65 ]; #C20-> C19 [ label = 0.72 ]; #C20-> C18 [ label = 0.83 ]; #C21-> C20 [ label = 0.87 ]; #C21-> C9 [ label = 0.76 ]; #C22-> 0 [ label = 0 ]; #C22-> 0 [ label = 0 ]; #C23-> 0 [ label = 0 ]; #C23-> 0 [ label = 0 ]; #C1 [label = "C1\n alpha= 0.84\n beta= 0.84\nN= 2"] ; #C2 [label = "C2\n alpha= 0.74\n beta= 0.74\nN= 2"] ; #C3 [label = "C3\n alpha= 0.62\n beta= 0.62\nN= 2"] ; #C4 [label = "C4\n alpha= 0.67\n beta= 0.67\nN= 2"] ; #C5 [label = "C5\n alpha= 0.7\n beta= 0.7\nN= 2"] ; #C6 [label = "C6\n alpha= 0.84\n beta= 0.84\nN= 2"] ; #C7 [label = "C7\n alpha= 0.64\n beta= 0.64\nN= 2"] ; #C8 [label = "C8\n alpha= 0.58\n beta= 0.58\nN= 2"] ; #C9 [label = "C9\n alpha= 0.9\n beta= 0.87\nN= 4"] ; #C10 [label = "C10\n alpha= 0.74\n beta= 0.71\nN= 3"] ; #C11 [label = "C11\n alpha= 0.62\n beta= 0.62\nN= 2"] ; #C12 [label = "C12\n alpha= 0.79\n beta= 0.74\nN= 5"] ; #C13 [label = "C13\n alpha= 0.64\n beta= 0.59\nN= 3"] ; #C14 [label = "C14\n alpha= 0.79\n beta= 0.74\nN= 4"] ; #C15 [label = "C15\n alpha= 0.66\n beta= 0.58\nN= 3"] ; #C16 [label = "C16\n alpha= 0.65\n beta= 0.57\nN= 3"] ; #C17 [label = "C17\n alpha= 0.81\n beta= 0.71\nN= 6"] ; #C18 [label = "C18\n alpha= 0.84\n beta= 0.75\nN= 9"] ; #C19 [label = "C19\n alpha= 0.74\n beta= 0.65\nN= 6"] ; #C20 [label = "C20\n alpha= 0.87\n beta= 0.74\nN= 15"] ; #C21 [label = "C21\n alpha= 0.9\n beta= 0.77\nN= 19"] ; #C22 [label = "C22\n alpha= 0\n beta= 0\nN= 0"] ; #C23 [label = "C23\n alpha= 0\n beta= 0\nN= 0"] ; #{ rank=same; #V1;V2;V3;V4;V5;V6;V7;V8;V9;V10;V11;V12;V13;V14;V15;V16;V17;V18;V19;V20;V21;V22;V23;V24;}} # #copy the above output to Graphviz and draw it #see \url{https://personality-project.org/r/r.ICLUST.html} for an example. } \keyword{multivariate }% \keyword{cluster}% \keyword{hplot} psych/man/matrix.addition.Rd0000644000176200001440000000250011222300457015531 0ustar liggesusers\name{matrix.addition} \alias{matrix.addition} \alias{\%+\%} \title{A function to add two vectors or matrices } \description{It is sometimes convenient to add two vectors or matrices in an operation analogous to matrix multiplication. For matrices nXm and mYp, the matrix sum of the i,jth element of nSp = sum(over m) of iXm + mYj. } \usage{ x \%+\% y } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a n by m matrix (or vector if m= 1)} \item{y}{ a m by p matrix (or vector if m = 1)} } \details{Used in such problems as Thurstonian scaling. Although not technically matrix addition, as pointed out by Krus, there are many applications where the sum or difference of two vectors or matrices is a useful operation. An alternative operation for vectors is outer(x ,y , FUN="+") but this does not work for matrices. } \value{a n by p matix of sums } \references{Krus, D. J. (2001) Matrix addition. Journal of Visual Statistics, 1, (February, 2001).} \author{William Revelle} \examples{ x <- seq(1,4) z <- x \%+\% -t(x) x z #compare with outer(x,-x,FUN="+") x <- matrix(seq(1,6),ncol=2) y <- matrix(seq(1,10),nrow=2) z <- x \%+\% y x y z #but compare this with outer(x ,y,FUN="+") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } psych/man/densityBy.Rd0000644000176200001440000001121714442474756014437 0ustar liggesusers\name{densityBy} \alias{densityBy} \alias{violinBy} \alias{violin} \title{Create a 'violin plot' or density plot of the distribution of a set of variables} \description{Among the many ways to describe a data set, one is a density plot for each value of a grouping variable and another is violin plot of multiple variables. A density plot shows the density for different groups to show effect sizes. A violin plot is similar to a box plot but shows the actual distribution. Median and 25th and 75th percentile lines are added to the display. If a grouping variable is specified, violinBy will draw violin plots for each variable and for each group. Data points may be drawn as well in what is known as a "raincloud plot". } \usage{ violin(x,data=NULL, var=NULL, grp=NULL, grp.name=NULL, xlab=NULL, ylab=NULL, main="Density plot", vertical=TRUE, dots=FALSE, rain=FALSE, jitter=.05, alpha=1, errors=FALSE, eyes=TRUE, adjust=1, restrict=TRUE, xlim=NULL, add=FALSE, col=NULL, pch=20, scale=NULL,...) violinBy(x, var=NULL, grp=NULL, data=NULL, grp.name=NULL, xlab=NULL, ylab=NULL, main="Density plot", vertical=TRUE, dots=FALSE, rain=FALSE, jitter=.05, alpha= 1, errors=FALSE, eyes=TRUE, adjust=1, restrict=TRUE, xlim=NULL, add=FALSE, col=NULL, pch=20, scale=NULL,...) densityBy(x, var=NULL, grp=NULL,data=NULL, freq=FALSE, col=c("blue","red","black"), alpha=.5, adjust=1, ylim=NULL, xlim=NULL, xlab="Variable", ylab="Density", main="Density Plot",legend=NULL) } \arguments{ \item{x}{A matrix or data.frame (can be expressed in formula input)} \item{var}{The variable(s) to display} \item{grp}{The grouping variable(s) } \item{data}{The name of the data object if using formula input} \item{grp.name}{If the grouping variable is specified, then what names should be give to the group? Defaults to 1:ngrp} \item{ylab}{The y label} \item{xlab}{The x label} \item{main}{Figure title} \item{vertical}{If TRUE, plot the violins vertically, otherwise, horizontonally} \item{dots}{if TRUE, add a stripchart with the data points} \item{rain}{If TRUE, draw a half violin with rain drops} \item{jitter}{If doing a stripchart, then jitter the points this much} \item{errors}{If TRUE, add error bars or cats eyes to the violins} \item{eyes}{if TRUE and errors=TRUE, then draw cats eyes} \item{alpha}{A degree of transparency (0=transparent ... 1 not transparent)} \item{adjust}{Allows smoothing of density histograms when plotting variables like height} \item{freq}{if TRUE, then plot frequencies (n * density)} \item{restrict}{Restrict the density to the observed max and min of the data} \item{xlim}{if not specified, will be .5 beyond the number of variables} \item{ylim}{If not specified, determined by the data} \item{add}{Allows overplotting} \item{col}{Allows for specification of colours. The default for 2 groups is blue and red, for more group levels, rainbows.} \item{pch}{The plot character for the mean is by default a small filled circle. To not show the mean, use pch=NA } \item{scale}{If NULL, scale the widths by the square root of sample size, otherwise scale by the value supplied.} \item{legend}{If not NULL, draw a legend at c(topleft,topright,top,left,right) } \item{\dots}{Other graphic parameters} } \details{ Describe the data using a violin plot. Change alpha to modify the shading. The grp variable may be used to draw separate violin plots for each of multiple groups. For relatively smallish data sets (< 500-1000), it is informative to also show the actual data points. This done with the dots=TRUE option. The jitter value is arbitrarily set to .05, but making it larger (say .1 or .2) will display more points. Perhaps even prettier, is draw "raincloud" plots (half violins with rain drops) } \value{ The density (y axis) by value (x axis) of the data (for densityBy) or a violin plot for each variable (perhaps broken down by groups) } \author{William Revelle } \note{Formula input added July 12, 2020} \seealso{ \code{\link{describe}}, \code{\link{describeBy}} and \code{\link{statsBy}} for descriptive statistics and \code{\link{error.bars}}, \code{\link{error.bars.by}} and \code{\link{bi.bars}}, \code{\link{histBy}} and \code{\link{scatterHist}} for other graphic displays } \examples{ violin(psychTools::bfi[4:8]) violin(SATV + SATQ ~ gender, data=sat.act, grp.name =cs(MV,FV,MQ,FQ)) #formula input violinBy(psychTools::bfi,var=4:7,grp ="gender",grp.name=c("M","F")) #rain does not work for multiple DVS, just up to 2 IVs violinBy(SATV ~ education,data =sat.act, rain=TRUE, pch=".", vertical=FALSE) #rain densityBy(SATV ~ gender,data =sat.act,legend=1) #formula input } \keyword{ multivariate } \keyword{ hplot } psych/man/factor.fit.Rd0000644000176200001440000000322412244450165014505 0ustar liggesusers\name{factor.fit} \alias{factor.fit} \title{ How well does the factor model fit a correlation matrix. Part of the VSS package } \description{The basic factor or principal components model is that a correlation or covariance matrix may be reproduced by the product of a factor loading matrix times its transpose: F'F or P'P. One simple index of fit is the 1 - sum squared residuals/sum squared original correlations. This fit index is used by \code{\link{VSS}}, \code{\link{ICLUST}}, etc. } \usage{ factor.fit(r, f) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{r}{a correlation matrix } \item{f}{A factor matrix of loadings.} } \details{There are probably as many fit indices as there are psychometricians. This fit is a plausible estimate of the amount of reduction in a correlation matrix given a factor model. Note that it is sensitive to the size of the original correlations. That is, if the residuals are small but the original correlations are small, that is a bad fit. Let \deqn{R* = R - FF'}{R*= R - FF'} \deqn{fit = 1 - \frac{ \sum(R*^2)}{\sum(R^2)}}{fit = 1 - sum(R*^2)/sum(R^2)}. The sums are taken for the off diagonal elements.} \value{fit } \author{ William Revelle} \seealso{ \code{\link{VSS}}, \code{\link{ICLUST}} } \examples{ \dontrun{ #compare the fit of 4 to 3 factors for the Harman 24 variables fa4 <- factanal(x,4,covmat=Harman74.cor$cov) round(factor.fit(Harman74.cor$cov,fa4$loading),2) #[1] 0.9 fa3 <- factanal(x,3,covmat=Harman74.cor$cov) round(factor.fit(Harman74.cor$cov,fa3$loading),2) #[1] 0.88 } } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/ICLUST.sort.Rd0000644000176200001440000000406213256544644014452 0ustar liggesusers\name{ICLUST.sort} \alias{ICLUST.sort} \alias{iclust.sort} \title{Sort items by absolute size of cluster loadings} \description{Given a cluster analysis or factor analysis loadings matrix, sort the items by the (absolute) size of each column of loadings. Used as part of ICLUST and SAPA analyses. The columns are rearranged by the } \usage{ ICLUST.sort(ic.load, cut = 0, labels = NULL,keys=FALSE,clustsort=TRUE) } \arguments{ \item{ic.load}{ The output from a factor or principal components analysis, or from ICLUST, or a matrix of loadings.} \item{cut}{Do not include items in clusters with absolute loadings less than cut} \item{labels}{labels for each item.} \item{keys}{should cluster keys be returned? Useful if clusters scales are to be scored.} \item{clustsort}{TRUE will will sort the clusters by their eigenvalues} } \details{When interpreting cluster or factor analysis outputs, is is useful to group the items in terms of which items have their biggest loading on each factor/cluster and then to sort the items by size of the absolute factor loading. A stable cluster solution will be one in which the output of these cluster definitions does not vary when clusters are formed from the clusters so defined. With the keys=TRUE option, the resulting cluster keys may be used to score the original data or the correlation matrix to form clusters from the factors. } \value{ \item{sorted }{A data.frame of item numbers, item contents, and item x factor loadings.} \item{cluster}{A matrix of -1, 0, 1s defining each item by the factor/cluster with the row wise largest absolute loading. } ... } \references{ \url{https://personality-project.org/r/r.ICLUST.html} } \author{William Revelle } \note{ Although part of the ICLUST set of programs, this is also more useful for factor or principal components analysis. } \seealso{ \code{\link{ICLUST.graph}},\code{\link{ICLUST.cluster}}, \code{\link{cluster.fit} }, \code{\link{VSS}}, \code{\link{factor2cluster} }} \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/correct.cor.Rd0000644000176200001440000000472613256544623014710 0ustar liggesusers\name{correct.cor} \alias{correct.cor} \title{ Find dis-attenuated correlations given correlations and reliabilities } \description{Given a raw correlation matrix and a vector of reliabilities, report the disattenuated correlations above the diagonal. } \usage{ correct.cor(x, y) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A raw correlation matrix } \item{y}{ Vector of reliabilities } } \details{Disattenuated correlations may be thought of as correlations between the latent variables measured by a set of observed variables. That is, what would the correlation be between two (unreliable) variables be if both variables were measured perfectly reliably. This function is mainly used if importing correlations and reliabilities from somewhere else. If the raw data are available, use \code{\link{score.items}}, or \code{\link{cluster.loadings}} or \code{\link{cluster.cor}}. Examples of the output of this function are seen in \code{\link{cluster.loadings}} and \code{\link{cluster.cor}} } \value{Raw correlations below the diagonal, reliabilities on the diagonal, disattenuated above the diagonal. } \references{ Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. Springer. at \url{https://personality-project.org/r/book/} } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu} } \seealso{ \code{\link{cluster.loadings}} and \code{\link{cluster.cor}}} \examples{ # attitude from the datasets package #example 1 is a rather clunky way of doing things a1 <- attitude[,c(1:3)] a2 <- attitude[,c(4:7)] x1 <- rowSums(a1) #find the sum of the first 3 attitudes x2 <- rowSums(a2) #find the sum of the last 4 attitudes alpha1 <- alpha(a1) alpha2 <- alpha(a2) x <- matrix(c(x1,x2),ncol=2) x.cor <- cor(x) alpha <- c(alpha1$total$raw_alpha,alpha2$total$raw_alpha) round(correct.cor(x.cor,alpha),2) # #much better - although uses standardized alpha clusters <- matrix(c(rep(1,3),rep(0,7),rep(1,4)),ncol=2) cluster.loadings(clusters,cor(attitude)) # or clusters <- matrix(c(rep(1,3),rep(0,7),rep(1,4)),ncol=2) cluster.cor(clusters,cor(attitude)) # #best keys <- make.keys(attitude,list(first=1:3,second=4:7)) scores <- scoreItems(keys,attitude) scores$corrected #However, to do the more general case of correcting correlations for reliabilty #corrected <- cor2cov(x.cor,1/alpha) #diag(corrected) <- 1 } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ multivariate }% __ONLY ONE__ keyword per line psych/man/skew.Rd0000644000176200001440000001013614537110505013415 0ustar liggesusers\name{mardia} \alias{mardia} \alias{skew} \alias{kurtosi} \title{ Calculate univariate or multivariate (Mardia's test) skew and kurtosis for a vector, matrix, or data.frame} \description{Find the skew and kurtosis for each variable in a data.frame or matrix. Unlike skew and kurtosis in e1071, this calculates a different skew for each variable or column of a data.frame/matrix. mardia applies Mardia's tests for multivariate skew and kurtosis } \usage{ skew(x, na.rm = TRUE,type=3) kurtosi(x, na.rm = TRUE,type=3) mardia(x,na.rm = TRUE,plot=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A data.frame or matrix } \item{na.rm}{ how to treat missing data } \item{type}{See the discussion in describe}. \item{plot}{Plot the expected normal distribution values versus the Mahalanobis distance of the subjects.} } \details{given a matrix or data.frame x, find the skew or kurtosis for each column (for skew and kurtosis) or the multivariate skew and kurtosis in the case of mardia. As of version 1.2.3,when finding the skew and the kurtosis, there are three different options available. These match the choices available in skewness and kurtosis found in the e1071 package (see Joanes and Gill (1998) for the advantages of each one). If we define \eqn{m_r = [\sum(X- mx)^r]/n}{m_r = [sum(X- mx)^r]/n} then Type 1 finds skewness and kurtosis by \eqn{g_1 = m_3/(m_2)^{3/2} } and \eqn{g_2 = m_4/(m_2)^2 -3}. Type 2 is \eqn{G1 = g1 * \sqrt{n *(n-1)}/(n-2)} and \eqn{G2 = (n-1)*[(n+1)g2 +6]/((n-2)(n-3))}. Type 3 is \eqn{b1 = [(n-1)/n]^{3/2} m_3/m_2^{3/2}} and \eqn{b2 = [(n-1)/n]^{3/2} m_4/m_2^2)}. For consistency with e1071 and with the Joanes and Gill, the types are now defined as above. However, from revision 1.0.93 to 1.2.3, kurtosi by default gives an unbiased estimate of the kurtosis (DeCarlo, 1997). Prior versions used a different equation which produced a biased estimate. (See the kurtosis function in the e1071 package for the distinction between these two formulae. The default, type 1 gave what is called type 2 in e1071. The other is their type 3.) For comparison with previous releases, specifying type = 2 will give the old estimate. These type numbers are now changed. } \value{ \item{skew}{if input is a matrix or data.frame, skew is a vector of skews} \item{kurtosi}{if input is a matrix or data.frame, kurtosi is a vector of kurtosi} \item{bp1}{Mardia's bp1 estimate of multivariate skew} \item{bp2}{Mardia's bp2 estimate of multivariate kurtosis} \item{skew}{Mardia's skew statistic} \item{small.skew}{Mardia's small sample skew statistic} \item{p.skew}{Probability of skew} \item{p.small}{Probability of small.skew} \item{kurtosis}{Mardia's multivariate kurtosis statistic} \item{p.kurtosis}{Probability of kurtosis statistic} \item{D}{Mahalanobis distance of cases from centroid} } \references{ Joanes, D.N. and Gill, C.A (1998). Comparing measures of sample skewness and kurtosis. The Statistician, 47, 183-189. L.DeCarlo. 1997) On the meaning and use of kurtosis, Psychological Methods, 2(3):292-307, K.V. Mardia (1970). Measures of multivariate skewness and kurtosis with applications. Biometrika, 57(3):pp. 519-30, 1970.} \author{William Revelle } \note{The mean function supplies means for the columns of a data.frame, but the overall mean for a matrix. Mean will throw a warning for non-numeric data, but colMeans stops with non-numeric data. Thus, the function uses either mean (for data frames) or colMeans (for matrices). This is true for skew and kurtosi as well. } \note{ Probability values less than 10^-300 are set to 0. } \seealso{\code{\link{describe}}, \code{\link{describe.by}}, mult.norm in QuantPsyc, Kurt in QuantPsyc} \examples{ round(skew(attitude),2) #type 3 (default) round(kurtosi(attitude),2) #type 3 (default) #for the differences between the three types of skew and kurtosis: round(skew(attitude,type=1),2) #type 1 round(skew(attitude,type=2),2) #type 2 mardia(attitude) x <- matrix(rnorm(1000),ncol=10) describe(x) mardia(x) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/biplot.psych.Rd0000644000176200001440000001426214537106327015075 0ustar liggesusers\name{biplot.psych} \alias{biplot.psych} \title{Draw biplots of factor or component scores by factor or component loadings} \description{Extends the biplot function to the output of \code{\link{fa}}, \code{\link{fa.poly}} or \code{\link{principal}}. Will plot factor scores and factor loadings in the same graph. If the number of factors > 2, then all pairs of factors are plotted. Factor score histograms are plotted on the diagonal. The input is the resulting object from \code{\link{fa}}, \code{\link{principal}}, or \code{\link{fa.poly}} with the scores=TRUE option. Points may be colored according to other criteria. } \usage{ \method{biplot}{psych}(x, labels=NULL,cex=c(.75,1),main="Biplot from fa", hist.col="cyan",xlim.s=c(-3,3),ylim.s=c(-3,3),xlim.f=c(-1,1),ylim.f=c(-1,1), maxpoints=100,adjust=1.2,col,pos, arrow.len = 0.1,pch=16,choose=NULL, cuts=1,cutl=.0,group=NULL,smoother=FALSE,vars=TRUE,...) } \arguments{ \item{x}{The output from \code{\link{fa}}, \code{\link{fa.poly}} or \code{\link{principal}} with the scores=TRUE option} \item{labels}{if NULL, draw the points with the plot character (pch) specified. To identify the data points, specify labels= 1:n where n is the number of observations, or labels =rownames(data) where data was the data set analyzed by the factor analysis.} \item{cex}{A vector of plot sizes of the data labels and of the factor labels} \item{main}{A main title for a two factor biplot} \item{hist.col}{If plotting more than two factors, the color of the histogram of the factor scores} \item{xlim.s}{x limits of the scores. Defaults to plus/minus three sigma} \item{ylim.s}{y limits of the scores.Defaults to plus/minus three sigma} \item{xlim.f}{x limits of the factor loadings.Defaults to plus/minus 1.0} \item{ylim.f}{y limits of the factor loadings.Defaults to plus/minus 1.0} \item{maxpoints}{When plotting 3 (or more) dimensions, at what size should we switch from plotting "o" to plotting "."} \item{adjust}{an adjustment factor in the histogram} \item{col}{a vector of colors for the data points and for the factor loading labels} \item{pos}{If plotting labels, what position should they be in? 1=below, 2=left, 3 top, 4 right. If missing, then the assumption is that labels should be printed instead of data points.} \item{arrow.len}{ the length of the arrow head} \item{pch}{The plotting character to use. pch=16 gives reasonable size dots. pch="." gives tiny points. If adding colors, use pch between 21 and 25. (see examples).} \item{choose}{Plot just the specified factors} \item{cuts}{Do not label cases with abs(factor scores) < cuts) (Actually, the distance of the x and y scores from 0) } \item{cutl}{Do not label variables with communalities in the two space < cutl} \item{group}{A vector of a grouping variable for the scores. Show a different color and symbol for each group.} \item{smoother}{If TRUE then do a smooth scatter plot (which shows the density rather than the data points). Only useful for large data sets.} \item{vars}{If TRUE, draw arrows for the variables, and plot the scores. If FALSE, then draw arrows for the scores and plot the variables.} \item{\dots}{more options for graphics} } \details{ Uses the generic biplot function to take the output of a factor analysis \code{\link{fa}}, \code{\link{fa.poly}} or principal components analysis \code{\link{principal}} and plot the factor/component scores along with the factor/component loadings. This is an extension of the generic biplot function to allow more control over plotting points in a two space and also to plot three or more factors (two at time). This will work for objects produced by \code{\link{fa}}, \code{\link{fa.poly}} or \code{\link{principal}} if they applied to the original data matrix. If however, one has a correlation matrix (e.g., based upon the output from \code{\link{tetrachoric}} or \code{\link{polychoric}}), and has done either \code{\link{fa}} or \code{\link{principal}} on the correlations, then obviously, we can not do a biplot. However, both of those functions produce a weights matrix, which, in combination with the original data can be used to find the scores by using \code{\link{factor.scores}}. Since biplot.psych is looking for two elements of the x object: x$loadings and x$scores, you can create the appropriate object to plot, or add it to the factor object See the third and fourth examples. In order to just plot the loadings, use \code{\link{fa.plot}}. Or, if we want to show the loadings as vectors, use pch = "". } \author{William Revelle} \seealso{\code{\link{fa}}, \code{\link{fa.poly}}, \code{\link{principal}}, \code{\link{fa.plot}}, \code{\link{pairs.panels}} } \examples{ #the standard example data(USArrests) fa2 <- fa(USArrests,2,scores=TRUE) biplot(fa2,labels=rownames(USArrests)) # plot the 3 factor solution #data(bfi) fa3 <- fa(psychTools::bfi[1:200,1:15],3,scores=TRUE) biplot(fa3) #just plot factors 1 and 3 from that solution biplot(fa3,choose=c(1,3)) # fa2 <- fa(psychTools::bfi[16:25],2) #factor analysis fa2$scores <- fa2$scores[1:100,] #just take the first 100 #now plot with different colors and shapes for males and females biplot(fa2,pch=c(24,21)[psychTools::bfi[1:100,"gender"]], group =psychTools::bfi[1:100,"gender"], main="Biplot of Openness and Neuroticism by gender") ## An example from the correlation matrix r <- cor(psychTools::bfi[1:200,1:10], use="pairwise") #find the correlations f2 <- fa(r,2) #biplot(f2) #this throws an error (not run) #f2 does not have scores, but we can find them f2$scores <- factor.scores(psychTools::bfi[1:200,1:10],f2) biplot(f2,main="biplot from correlation matrix and factor scores") #or create a new object with the scores #find the correlations for all subjects r <- cor(psychTools::bfi[1:10], use="pairwise") f2 <- fa(r,2) x <- list() #find the scores for just the first 200 subjects x$scores <- factor.scores(psychTools::bfi[1:200,1:10],f2) x$loadings <- f2$loadings class(x) <- c('psych','fa') biplot(x,main="biplot from correlation matrix combined with factor scores") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{hplot }% __ONLY ONE__ keyword per line psych/man/Pinv.Rd0000644000176200001440000000362013534045255013365 0ustar liggesusers\name{Pinv} \alias{Pinv} \title{Compute the Moore-Penrose Pseudo Inverse of a matrix} \description{Given a matrix of less than full rank, the conventional inverse function will fail. The pseudoinverse or generalized inverse resolves this problem by using just the postive values of the singular value decomposition d matrix. An adaptation of the ginv function from MASS and the pinv function from pracma. } \usage{ Pinv(X, tol = sqrt(.Machine$double.eps)) } \arguments{ \item{X}{A correlation or covariance matrix to analyze} \item{tol}{A very small number. Reject values with eigen values less than tolerance} } \details{ The singular value decomposition of a matrix X is UdV where for full rank matrices, d is the vector of eigen values and U and V are the matrices of eigen vectors. The inverse is just U/d. If the matrix is less than full rank, many of the d values are effectively zero (at the limit of computational accuracy.) Thus, to solve matrix equations with matrices of less than full rank (e.g. the \code{\link{schmid}} Schmid-Leiman solution), we need to find the generalized inverse. } \value{The generalized inverse} \references{ Venables, W. N. and Ripley, B. D. (1999) Modern Applied Statistics with S-PLUS. Third Edition. Springer. p.100. } \author{ William Revelle } \note{Adapted from the ginv function in MASS and the pinv function in pracma. Installed here to avoid loading those packages. } \seealso{ \code{\link{schmid}}, \code{\link{faCor}} } \examples{ round(Pinv(Thurstone) \%*\% Thurstone,2) #an identity matrix sl <- schmid(Thurstone,3) #The schmid-leiman solution is less than full rank F <- sl$sl[,1:4] #the SL solution is general + 3 gropus R <- Thurstone # diag(R) <- sl$sl[,5] #the reproduced matrix (R - U2) S <- t(Pinv(t(F) \%*\% F) \%*\% t(F) \%*\% R) #the structure matrix Phi <- t(S) \%*\% F \%*\% Pinv(t(F) \%*\% F) #the factor covariances } \keyword{multivariate}psych/man/tetrachor.Rd0000644000176200001440000004444114362051373014450 0ustar liggesusers\name{tetrachoric} \alias{tetrachoric} \alias{tetrachor} \alias{polychoric} \alias{biserial} \alias{polydi} \alias{polyserial} \alias{poly.mat} \title{Tetrachoric, polychoric, biserial and polyserial correlations from various types of input} \description{The tetrachoric correlation is the inferred Pearson Correlation from a two x two table with the assumption of bivariate normality. The polychoric correlation generalizes this to the n x m table. Particularly important when doing Item Response Theory or converting comorbidity statistics using normal theory to correlations. Input may be a 2 x 2 table of cell frequencies, a vector of cell frequencies, or a data.frame or matrix of dichotomous data (for tetrachoric) or of numeric data (for polychoric). The biserial correlation is between a continuous y variable and a dichotmous x variable, which is assumed to have resulted from a dichotomized normal variable. Biserial is a special case of the polyserial correlation, which is the inferred latent correlation between a continuous variable (X) and a ordered categorical variable (e.g., an item response). Input for these later two are data frames or matrices. Requires the mnormt package. } \usage{ tetrachoric(x,y=NULL,correct=.5,smooth=TRUE,global=TRUE,weight=NULL,na.rm=TRUE, delete=TRUE) polychoric(x,y=NULL,smooth=TRUE,global=TRUE,polycor=FALSE,ML=FALSE, std.err=FALSE, weight=NULL,correct=.5,progress=TRUE,na.rm=TRUE, delete=TRUE,max.cat=8) biserial(x,y) polyserial(x,y) polydi(p,d,taup,taud,global=TRUE,ML = FALSE, std.err = FALSE, weight=NULL,progress=TRUE,na.rm=TRUE,delete=TRUE,correct=.5) #deprecated use polychoric instead poly.mat(x, short = TRUE, std.err = FALSE, ML = FALSE) } \arguments{ \item{x}{The input may be in one of four forms: a) a data frame or matrix of dichotmous data (e.g., the lsat6 from the bock data set) or discrete numerical (i.e., not too many levels, e.g., the big 5 data set, bfi) for polychoric, or continuous for the case of biserial and polyserial. b) a 2 x 2 table of cell counts or cell frequencies (for tetrachoric) or an n x m table of cell counts (for both tetrachoric and polychoric). c) a vector with elements corresponding to the four cell frequencies (for tetrachoric) d) a vector with elements of the two marginal frequencies (row and column) and the comorbidity (for tetrachoric) } \item{y}{A (matrix or dataframe) of discrete scores. In the case of tetrachoric, these should be dichotomous, for polychoric not too many levels, for biserial they should be discrete (e.g., item responses) with not too many (<10?) categories.} \item{correct}{Correction value to use to correct for continuity in the case of zero entry cell for tetrachoric, polychoric, polybi, and mixed.cor. See the examples for the effect of correcting versus not correcting for continuity.} \item{smooth}{if TRUE and if the tetrachoric/polychoric matrix is not positive definite, then apply a simple smoothing algorithm using cor.smooth} \item{global}{When finding pairwise correlations, should we use the global values of the tau parameter (which is somewhat faster), or the local values (global=FALSE)? The local option is equivalent to the polycor solution, or to doing one correlation at a time. global=TRUE borrows information for one item pair from the other pairs using those item's frequencies. This will make a difference in the presence of lots of missing data. With very small sample sizes with global=FALSE and correct=TRUE, the function will fail (for as yet underdetermined reasons. } \item{polycor}{A no longer used option, kept to stop other packages from breaking.} \item{weight}{A vector of length of the number of observations that specifies the weights to apply to each case. The NULL case is equivalent of weights of 1 for all cases. } \item{short}{ short=TRUE, just show the correlations, short=FALSE give the full hetcor output from John Fox's hetcor function if installed and if doing polychoric Deprecated} \item{std.err}{std.err=FALSE does not report the standard errors (faster) deprecated} \item{progress}{Show the progress bar (if not doing multicores)} \item{ML}{ ML=FALSE do a quick two step procedure, ML=TRUE, do longer maximum likelihood --- very slow! Deprecated} \item{na.rm}{Should missing data be deleted} \item{delete}{Cases with no variance are deleted with a warning before proceeding.} \item{max.cat}{The maximum number of categories to bother with for polychoric. } \item{p}{The polytomous input to polydi} \item{d}{The dichotomous input to polydi} \item{taup}{The tau values for the polytomous variables -- if global=TRUE} \item{taud}{The tau values for the dichotomous variables -- if globabl = TRUE} } \details{ Tetrachoric correlations infer a latent Pearson correlation from a two x two table of frequencies with the assumption of bivariate normality. The estimation procedure is two stage ML. Cell frequencies for each pair of items are found. In the case of tetrachorics, cells with zero counts are replaced with .5 as a correction for continuity (correct=TRUE). The data typically will be a raw data matrix of responses to a questionnaire scored either true/false (tetrachoric) or with a limited number of responses (polychoric). In both cases, the marginal frequencies are converted to normal theory thresholds and the resulting table for each item pair is converted to the (inferred) latent Pearson correlation that would produce the observed cell frequencies with the observed marginals. (See \code{\link{draw.tetra}} and \code{\link{draw.cor}} for illustrations.) This is a computationally intensive function which can be speeded up considerably by using multiple cores and using the parallel package. The number of cores to use when doing polychoric or tetrachoric may be specified using the options command. The greatest step up in speed is going from 1 cores to 2. This is about a 50\% savings. Going to 4 cores seems to have about at 66\% savings, and 8 a 75\% savings. The number of parallel processes defaults to 2 but can be modified by using the \code{\link{options}} command: options("mc.cores"=4) will set the number of cores to 4. \code{\link{tetrachoric}} and \code{\link{polychoric}} can find non-symmetric correlation matrices of set of x variables (coluumns) and y variable (rows). This is useful if extending a solution from a base set of items to a different set. See \code{\link{fa.extension}} for an application of this. The tetrachoric correlation is used in a variety of contexts, one important one being in Item Response Theory (IRT) analyses of test scores, a second in the conversion of comorbity statistics to correlation coefficients. It is in this second context that examples of the sensitivity of the coefficient to the cell frequencies becomes apparent: Consider the test data set from Kirk (1973) who reports the effectiveness of a ML algorithm for the tetrachoric correlation (see examples). Examples include the lsat6 and lsat7 data sets in the \code{\link{bock}} data. The polychoric function forms matrices of polychoric correlations by an local function (polyc) and will also report the tau values for each alternative. Earlier versions used John Fox's polychor function which has now been replaced by the polyc function. For finding one polychoric correlation from a table, see the Olsson example (below). \code{\link{polychoric}} replaces \code{\link{poly.mat}} and is recommended. \code{\link{poly.mat}} was an alternative wrapper to the polycor function. biserial and polyserial correlations are the inferred latent correlations equivalent to the observed point-biserial and point-polyserial correlations (which are themselves just Pearson correlations). The polyserial function is meant to work with matrix or dataframe input and treats missing data by finding the pairwise Pearson r corrected by the overall (all observed cases) probability of response frequency. This is particularly useful for SAPA procedures (\url{https://www.sapa-project.org/}) (Revelle et al. 2010, 2016, 2020) with large amounts of missing data and no complete cases. See also the International Cognitive Ability Resource (\url{https://www.icar-project.org/}) for similar data. Ability tests and personality test matrices will typically have a cleaner structure when using tetrachoric or polychoric correlations than when using the normal Pearson correlation. However, if either alpha or omega is used to find the reliability, this will be an overestimate of the squared correlation of a latent variable with the observed variable. A biserial correlation (not to be confused with the point-biserial correlation which is just a Pearson correlation) is the latent correlation between x and y where y is continuous and x is dichotomous but assumed to represent an (unobserved) continuous normal variable. Let p = probability of x level 1, and q = 1 - p. Let zp = the normal ordinate of the z score associated with p. Then, \eqn{rbi = r s* \sqrt(pq)/zp }. As nicely discussed by MacCallum et al, 2002, if artificially dichotomizing at the mean, the point biserial will be .8 of the biserial (actually .798). Similarly, the phi coefficient (a Pearson on dichotmous data will be 2[arcsin(rho)/pi) of the real value (rho). The 'ad hoc' polyserial correlation, rps is just \eqn{r = r * sqrt(n-1)/n) \sigma y /\sum(zpi) } where zpi are the ordinates of the normal curve at the normal equivalent of the cut point boundaries between the item responses. (Olsson, 1982) All of these were inspired by (and adapted from) John Fox's polycor package which should be used for precise ML estimates of the correlations. See, in particular, the hetcor function in the polychor package. The results from polychoric match the polychor answers to at least 5 decimals when using correct=FALSE, and global = FALSE. Particularly for tetrachoric correlations from sets of data with missing data, the matrix will sometimes not be positive definite. Various smoothing alternatives are possible, the one done here is to do an eigen value decomposition of the correlation matrix, set all negative eigen values to 10 * .Machine$double.eps, normalize the positive eigen values to sum to the number of variables, and then reconstitute the correlation matrix. A warning is issued when this is done. For very small data sets, the correction for continuity for the polychoric correlations can lead to difficulties, particularly if using the global=FALSE option, or if doing just one correlation at a time. Setting a smaller correction value (i.e., correct =.1) seems to help. John Uebersax (2015) makes the interesting point that both polychoric and tetrachoric correlations should be called latent correlations or latent continuous correlations because of the way they are found and not tetrachoric or polychoric which is the way they were found in the past. That is, what is the correlation between two latent variables that when artificially broken into two (tetrachoric) or more (polychoric) values produces the n x n table of observed frequencies. For combinations of continous, categorical, and dichotomous variables, see \code{\link{mixed.cor}}. If using data with a variable number of response alternatives, it is necessary to use the global=FALSE option in polychoric. (This is set automatically if this condition is detected). For relatively small samples with dichotomous data if some cells are empty, or if the resampled matrices are not positive semi-definite, warnings are issued. this leads to serious problems if using multi.cores (the default if using a Mac). The solution seems to be to not use multi.cores (e.g., options(mc.cores =1) \code{\link{mixedCor}} which calls \code{\link{polychoric}} for the N = 4,000 with 145 items of the spi data set, on a Mac book Pro with a 2.4 GHz 8-core Intel i9, 1 core took 130 seconds, 2 cores, 68 seconds, 4 37, 8 22 and 16 22.8. Just finding the polychoric correlations (spi[11:145]) for 4 cores took 34 and for 8 cores, 22 seconds. (Update in 2022: with an M1 max chip) mixedCor for all 145 variables: 1 core = 54, 2 cores 28, 4 core = 15.8 seconds, 8 cores= 8.9 seconds. For just the polychorics 4 cores = 13.7, 8 cores = 7.4 . As the number of categories increases, the computation time does as well. For more than about 8 categories, the difference between a normal Pearson correlation and the polychoric is not very large*. However, the number of categories (num.cat) may be set to large values if desired. (Added for version 2.0.6). *A recent article by Foldnes and Gronneberg suggests using polychoric is appropriate for categorical data even if the number of categories is large. This particularly the case for very non-normal distributions of the category frequencies. } \value{ \item{rho}{The (matrix) of tetrachoric/polychoric/biserial correlations} \item{tau}{The normal equivalent of the cutpoints} \item{fixed}{If any correlations were adjusted for continuity, the total number of adjustments will be reported. } } \references{ A. Gunther and M. Hofler. Different results on tetrachorical correlations in mplus and stata-stata announces modified procedure. Int J Methods Psychiatr Res, 15(3):157-66, 2006. David Kirk (1973) On the numerical approximation of the bivariate normal (tetrachoric) correlation coefficient. Psychometrika, 38, 259-268. Foldnes, N., & Gronneberg, S. (2021, April 1). The Sensitivity of Structural Equation Modeling With Ordinal Data to Underlying Non-Normality and Observed Distributional Forms. Psychological Methods. Advance online publication. http://dx.doi.org/10.1037/met0000385 MacCallum, Robert C. and Zhang, Shaobo and Preacher, Kristopher J. and Rucker, Derek D. (2002) On the practice of dichotomization of quantitative variables., Psychological Methods, 7, (1) 19-40. Olsson, U. Maximum Likelihood Estimation of the Polychoric Correlation Coefficient, Psychometrika, 44:443-460. U.Olsson, F.Drasgow, and N.Dorans (1982). The polyserial correlation coefficient. Psychometrika, 47:337-347. Revelle, W., Wilt, J., and Rosenthal, A. (2010) Individual Differences in Cognition: New Methods for examining the Personality-Cognition Link In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. Revelle, W, Condon, D.M., Wilt, J., French, J.A., Brown, A., and Elleman, L.G. (2016) Web and phone based data collection using planned missing designs. In Fielding, N.G., Lee, R.M. and Blank, G. (Eds). SAGE Handbook of Online Research Methods (2nd Ed), Sage Publcations W. Revelle, E.M. Dworak and D.M. Condon (2020) Exploring the persome: The power of the item in understanding personality structure. Personality and Individual Differences, \doi{10.1016/j.paid.2020.109905} } \author{ William Revelle } \note{For tetrachoric, in the degenerate case of a cell entry with zero observations, a correction for continuity is applied and .5 is added to the cell entry. A warning is issued. If correct=FALSE the correction is not applied. This correction is, by default, on. It can be adjusted by specifying a smaller value. See the examples. For correct=FALSE, the results agree perfectly with John Fox's polycor function. Switched to using sadmvn from the mnormt package to speed up by 50\%. Some small data sets with differing number of alternatives and missing values can lead to errors. This seems to be solved by setting the correct=0 option. } \seealso{\code{\link{mixed.cor}} to find the correlations between mixtures of continuous, polytomous, and dichtomous variables. See the \code{\link{bigCor}} function for very large correlation matrices (uses Pearson correlations). See also the polychor function in the polycor package. \code{\link{irt.fa}} uses the tetrachoric function to do item analysis with the \code{\link{fa}} factor analysis function. \code{\link{draw.tetra}} shows the logic behind a tetrachoric correlation (for teaching purpuses.) } \examples{ #if(require(mnormt)) { data(bock) tetrachoric(lsat6) polychoric(lsat6) #values should be the same tetrachoric(matrix(c(44268,193,14,0),2,2)) #MPLUS reports.24 #Do not apply continuity correction -- compare with previous analysis! tetrachoric(matrix(c(44268,193,14,0),2,2),correct=0) #the default is to add correct=.5 to 0 cells tetrachoric(matrix(c(61661,1610,85,20),2,2)) #Mplus reports .35 tetrachoric(matrix(c(62503,105,768,0),2,2)) #Mplus reports -.10 tetrachoric(matrix(c(24875,265,47,0),2,2)) #Mplus reports 0 polychoric(matrix(c(61661,1610,85,20),2,2)) #Mplus reports .35 polychoric(matrix(c(62503,105,768,0),2,2)) #Mplus reports -.10 polychoric(matrix(c(24875,265,47,0),2,2)) #Mplus reports 0 #Do not apply continuity correction- compare with previous analysis tetrachoric(matrix(c(24875,265,47,0),2,2), correct=0) polychoric(matrix(c(24875,265,47,0),2,2), correct=0) #the same result #examples from Kirk 1973 #note that Kirk's tables have joint probability followed by marginals, but #tetrachoric needs marginals followed by joint probability tetrachoric(c(.5,.5,.333333)) #should be .5 tetrachoric(c(.5,.5,.1150267)) #should be -.75 tetrachoric(c(.5,.5,.397584)) #should e .8 tetrachoric(c(.158655254,.158655254,.145003)) #should be .99 #the example from Olsson, 1979 x <- as.table(matrix(c(13,69,41,6,113,132,0,22,104),3,3)) polychoric(x,correct=FALSE) #Olsson reports rho = .49, tau row = -1.77, -.14 and tau col = -.69, .67 #give a vector of two marginals and the comorbidity tetrachoric(c(.2, .15, .1)) tetrachoric(c(.2, .1001, .1)) #} else { # message("Sorry, you must have mnormt installed")} # 4 plots comparing biserial to point biserial and latent Pearson correlation set.seed(42) x.4 <- sim.congeneric(loads =c(.9,.6,.3,0),N=1000,short=FALSE) y <- x.4$latent[,1] for(i in 1:4) { x <- x.4$observed[,i] r <- round(cor(x,y),1) ylow <- y[x<= 0] yhigh <- y[x > 0] yc <- c(ylow,yhigh) rpb <- round(cor((x>=0),y),2) rbis <- round(biserial(y,(x>=0)),2) ellipses(x,y,ylim=c(-3,3),xlim=c(-4,3),pch=21 - (x>0), main =paste("r = ",r,"rpb = ",rpb,"rbis =",rbis)) dlow <- density(ylow) dhigh <- density(yhigh) points(dlow$y*5-4,dlow$x,typ="l",lty="dashed") lines(dhigh$y*5-4,dhigh$x,typ="l") } #show non-symmeteric results test1 <- tetrachoric(psychTools::ability[,1:4],psychTools::ability[,5:10]) test2 <- polychoric(psychTools::ability[,1:4],psychTools::ability[,5:10]) all <- tetrachoric(psychTools::ability[,1:10]) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/manhattan.Rd0000644000176200001440000000736413463616526014443 0ustar liggesusers\name{manhattan} \alias{manhattan} \title{"Manhattan" plots of correlations with a set of criteria.} \description{ A useful way of showing the strength of many correlations with a particular criterion is the Manhattan plot. This is just a plot of correlations ordered by some keying variable. Useful to understand the basis of items used in \code{\link{bestScales}}. } \usage{ manhattan(x, criteria = NULL, keys = NULL,raw=TRUE,n.obs=NULL, abs = TRUE, ylab = NULL, labels = NULL, log.p = FALSE,ci=.05, pch = 21, main = "Manhattan Plot of", adjust="holm",ylim = NULL,digits=2,dictionary=NULL, ...) } \arguments{ \item{x}{A matrix or data.frame of items or a correlation matrix.} \item{criteria}{What column names should be predicted. If a separate file, what are the variables to predict.} \item{keys}{a keys.list similar to that used in \code{\link{scoreItems}} } \item{raw}{The default is raw data, the alternative is a correlation matrix} \item{n.obs}{If given a correlation matrix, and showing log.p, we need the number of observations} \item{abs}{Should we show the absolute value of the correlations.} \item{ylab}{If NULL, will label as either correlations or log (10) of correlations} \item{labels}{if NULL, will use the names of the keys} \item{log.p}{Should we show the correlations (log.p = FALSE) or the log of the probabilities of the correlations (TRUE)} \item{ci}{The probability for the upper and lower confidence intervals -- bonferroni adjusted} \item{pch}{The default plot chararcter is a filled circle} \item{main}{The title for each criterion} \item{adjust}{Which adjustment for multiple correlations should be applied ("holm", "bonferroni", "none")} \item{ylim}{If NULL will be the min and max of the data} \item{digits}{Round off the results to digits} \item{dictionary}{A dictionary of items} \item{\dots}{Other graphic parameters} } \details{When exploring the correlations of many items with a few criteria, it is useful to form scales from the most correlated items (see \code{\link{bestScales}}. To get a feeling of the distribution of items across various measures, we can display their correlations (or the log of the probabilities) grouped by some set of scale keys. May also be used to display and order correlations (rows) with a criteria (columns) if given a correlation as input (raw=FALSE). } \value{ The correlations or the log p values are returned (invisibily) } \author{ William Revelle } \seealso{ \code{\link{bestScales}}, \code{\link{error.dots}} } \examples{ op <- par(mfrow=(c(2,3))) #we want to compare two different sets of plots manhattan(psychTools::bfi[1:25],psychTools::bfi[26:28] ,labels=colnames(psychTools::bfi)[1:25], dictionary=psychTools::bfi.dictionary) manhattan(psychTools::bfi[1:25],psychTools::bfi[26:28],log.p=TRUE, dictionary=psychTools::bfi.dictionary) #Do it again, but now show items by the keys.list bfi.keys <- list(agree=c("-A1","A2","A3","A4","A5"),conscientious=c("C1","C2","C3","-C4","-C5"), extraversion=c("-E1","-E2","E3","E4","E5"),neuroticism=c("N1","N2","N3","N4","N5"), openness = c("O1","-O2","O3","O4","-O5")) man <- manhattan(psychTools::bfi[1:25],psychTools::bfi[26:28],keys=bfi.keys, dictionary=psychTools::bfi.dictionary[1:2]) manhattan(psychTools::bfi[1:25],psychTools::bfi[26:28],keys=bfi.keys,log.p=TRUE, dictionary=psychTools::bfi.dictionary[1:2]) #Alternatively, use a matrix as input R <-cor(psychTools::bfi[1:25],psychTools::bfi[26:28],use="pairwise") manhattan(R,cs(gender,education,age),keys=bfi.keys, dictionary=psychTools::bfi.dictionary[1:2], raw=FALSE,abs=FALSE) par <- op psychTools::dfOrder(man,1,ascending=FALSE) #print out the items sorted on gender } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{hplot }psych/man/error.dots.Rd0000644000176200001440000001300514372530040014540 0ustar liggesusers\name{error.dots} \alias{error.dots} \title{Show a dot.chart with error bars for different groups or variables} \description{Yet one more of the graphical ways of showing data with error bars for different groups. A dot.chart with error bars for different groups or variables is found using from \code{\link{describe}}, \code{\link{describeBy}}, \code{\link{statsBy}}, \code{\link{corCi}}, \code{\link{corr.test}} or data from \code{\link{bestScales}}. } \usage{ error.dots(x=NULL, var = NULL, se = NULL, group = NULL, sd = FALSE, effect=NULL, stats=NULL, head = 12, tail = 12, sort = TRUE, decreasing = TRUE, main = NULL, alpha = 0.05, eyes = FALSE,items=FALSE, min.n = NULL, max.labels = 40, labels = NULL, label.width=NULL, select=NULL, groups = NULL, gdata = NULL, cex = par("cex"), pt.cex = cex, pch = 21, gpch = 21, bg = par("bg"), fg=par("fg"), color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlab = NULL, ylab = NULL, xlim = NULL,add=FALSE,order=NULL, ...) } \arguments{ \item{x}{A data frame or matrix of raw data, or the resulting object from \code{\link{describe}}, \code{\link{describeBy}}, \code{\link{statsBy}}, \code{\link{bestScales}}, \code{\link{corCi}}, \code{\link{corr.test}}, or \code{\link{cohen.d}} } \item{var}{The variable to show (particularly if doing describeBy or StatsBy plots).} \item{se}{Source of a standard error} \item{group}{A grouping variable, if desired. Will group the data on group for one variable (var) } \item{sd}{if FALSE, confidence intervals in terms of standard errors, otherwise draw one standard deviation} \item{effect}{Should the data be compared to a specified group (with mean set to 0) in effect size units?} \item{stats}{A matrix of means and se to use instead of finding them from the data} \item{head}{The number of largest values to report} \item{tail}{The number of smallest values to report} \item{sort}{Sort the groups/variables by value} \item{decreasing}{Should they be sorted in increasing or decreasing order (from top to bottom)} \item{main}{The caption for the figure} \item{alpha}{p value for confidence intervals} \item{eyes}{Draw catseyes for error limits} \item{items}{If showing results from best scales, show the items for a specified dv} \item{min.n}{If using describeBy or statsBy, what should be the minimum sample size to draw} \item{max.labels}{Length of labels (truncate after this value)} \item{labels}{Specify the labels versus find them from the row names} \item{label.width}{Truncate after labels.width} \item{select}{Scale the plot for all the variables, but just show the select variables} \item{groups}{ignored: to be added eventually} \item{gdata}{ignored} \item{cex}{The standard meaning of cex for graphics} \item{pt.cex}{ignored} \item{pch}{Plot character of the mean} \item{gpch}{ignored} \item{bg}{background color (of the dots showing the means)} \item{fg}{foreground color (of the line segments)} \item{color}{Color of the text labels} \item{gcolor}{ignored} \item{lcolor}{ignored until groups are implemented } \item{xlab}{Label the x axis, if NULL, the variable name is used} \item{ylab}{If NULL, then the group rownames are used} \item{xlim}{If NULL, then calculated to show nice values} \item{add}{If TRUE, will add the plot to a previous plot (e.g., from dotchart)} \item{order}{if sort=TRUE, if order is NULL, sort on values, otherwise, if order is returned from a previous figure, use that order. } \item{\dots}{And any other graphic parameters we have forgotten} } \details{ Adapted from the dot.chart function to include error bars and to use the output of \code{\link{describe}}, \code{\link{describeBy}}, \code{\link{statsBy}}, \code{\link{fa}}, \code{\link{bestScales}} or \code{\link{cohen.d}}. To speed up multiple plots, the function can work from the output of a previous run. Thus describeBy will be done and the results can be show for multiple variables. If using the add=TRUE option to add an error.dots plot to a dotplot, note that the order of variables in dot plots goes from last to first (highest y value is actually the last value in a vector.) Also note that the xlim parameter should be set to make sure the plots line up correctly.} \value{ Returns (invisibily) either a describeBy or describe object as well as the order if sorted } \references{Used in particular for showing https://sapa-project.org output.} \author{William Revelle} \seealso{ \code{\link{describe}}, \code{\link{describeBy}}, or \code{\link{statsBy}} as well as \code{\link{error.bars}}, \code{\link{error.bars.by}}, \code{\link{statsBy}}, \code{\link{bestScales}} or \code{\link{cohen.d}} } \examples{ temp <- error.dots(psychTools::bfi[1:25],sort=TRUE, xlab="Mean score for the item, sorted by difficulty") error.dots(psychTools::bfi[1:25],sort=TRUE, order=temp$order, add=TRUE, eyes=TRUE) #over plot with eyes error.dots(psychTools::ability,eyes=TRUE, xlab="Mean score for the item") cd <- cohen.d(psychTools::bfi[1:26],"gender") temp <- error.dots(cd, select=c(1:15,21:25),head=12,tail=13, main="Cohen d and confidence intervals of BFI by gender") error.dots(cd,select=c(16:20),head=13,tail=12,col="blue",add=TRUE,fg="red" ,main="") abline(v=0) #now show cis for correlations R <- corCi(attitude,plot=FALSE) error.dots(R, sort=FALSE) #the asymmetric case R <- corr.test(attitude[,1:2],attitude[,3:7]) error.dots(R, sort=FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/irt.person.rasch.Rd0000644000176200001440000000550411672536615015664 0ustar liggesusers\name{irt.1p} \alias{irt.0p} \alias{irt.1p} \alias{irt.2p} \alias{irt.person.rasch} \title{Item Response Theory estimate of theta (ability) using a Rasch (like) model} \description{Item Response Theory models individual responses to items by estimating individual ability (theta) and item difficulty (diff) parameters. This is an early and crude attempt to capture this modeling procedure. A better procedure is to use \code{\link{irt.fa}}. } \usage{ irt.person.rasch(diff, items) irt.0p(items) irt.1p(delta,items) irt.2p(delta,beta,items) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{diff}{ A vector of item difficulties --probably taken from irt.item.diff.rasch} \item{items}{A matrix of 0,1 items nrows = number of subjects, ncols = number of items} \item{delta}{delta is the same as diff and is the item difficulty parameter} \item{beta}{beta is the item discrimination parameter found in \code{\link{irt.discrim}} } } \details{A very preliminary IRT estimation procedure. Given scores xij for ith individual on jth item \cr Classical Test Theory ignores item difficulty and defines ability as expected score : abilityi = theta(i) = x(i.) A zero parameter model rescales these mean scores from 0 to 1 to a quasi logistic scale ranging from - 4 to 4 This is merely a non-linear transform of the raw data to reflect a logistic mapping. Basic 1 parameter (Rasch) model considers item difficulties (delta j): p(correct on item j for the ith subject |theta i, deltaj) = 1/(1+exp(deltaj - thetai)) If we have estimates of item difficulty (delta), then we can find theta i by optimization Two parameter model adds item sensitivity (beta j): p(correct on item j for subject i |thetai, deltaj, betaj) = 1/(1+exp(betaj *(deltaj- theta i))) Estimate delta, beta, and theta to maximize fit of model to data. The procedure used here is to first find the item difficulties assuming theta = 0 Then find theta given those deltas Then find beta given delta and theta. This is not an "official" way to do IRT, but is useful for basic item development. See \code{\link{irt.fa}} and \code{\link{score.irt}} for far better options. } \value{ a data.frame with estimated ability (theta) and quality of fit. (for irt.person.rasch) \cr a data.frame with the raw means, theta0, and the number of items completed} \author{ William Revelle} \note{ Not recommended for serious use. This code is under development. Much better functions are in the ltm and eRm packages. Similar analyses can be done using \code{\link{irt.fa}} and \code{\link{score.irt}}. } \seealso{ \code{\link{sim.irt}}, \code{\link{sim.rasch}}, \code{\link{logistic}}, \code{\link{irt.fa}}, \code{\link{tetrachoric}}, \code{\link{irt.item.diff.rasch}} } \keyword{multivariate}% at least one, from doc/KEYWORDS \keyword{models}% __ONLY ONE__ keyword per line psych/man/kaiser.Rd0000644000176200001440000000323313016076552013726 0ustar liggesusers\name{kaiser} \alias{kaiser} \title{Apply the Kaiser normalization when rotating factors} \description{ Kaiser (1958) suggested normalizing factor loadings before rotating them, and then denormalizing them after rotation. The GPArotation package does not (by default) normalize, nor does the \code{\link{fa}} function. Then, to make it more confusing, varimax in stats does,Varimax in GPArotation does not. \code{\link{kaiser}} will take the output of a non-normalized solution and report the normalized solution. } \usage{ kaiser(f, rotate = "oblimin",m=4,pro.m=4) } \arguments{ \item{f}{A factor analysis output from \code{\link{fa}} or a factor loading matrix. } \item{rotate}{Any of the standard rotations avaialable in the GPArotation package. } \item{m}{a parameter to pass to \code{\link{Promax}} } \item{pro.m}{A redundant parameter, which is used to replace m in calls to Promax} } \details{Best results if called from an unrotated solution. Repeated calls using a rotated solution will produce incorrect estimates of the correlations between the factors. } \value{See the values returned by GPArotation functions} \references{ Kaiser, H. F. (1958) The varimax criterion for analytic rotation in factor analysis. Psychometrika 23, 187-200. } \author{ William Revelle } \note{ Prepared in response to a question about why \code{\link{fa}} oblimin results are different from SPSS. } \seealso{ \code{\link{fa}}, \code{\link{Promax}} } \examples{ f3 <- fa(Thurstone,3) f3n <- kaiser(fa(Thurstone,3,rotate="none")) f3p <- kaiser(fa(Thurstone,3,rotate="none"),rotate="Promax",m=3) factor.congruence(list(f3,f3n,f3p)) } \keyword{ multivariate } \keyword{ models}psych/man/p.rep.Rd0000644000176200001440000000766011222300515013466 0ustar liggesusers\name{p.rep} \alias{p.rep} \alias{p.rep.f} \alias{p.rep.t} \alias{p.rep.r} \title{Find the probability of replication for an F, t, or r and estimate effect size } \description{The probability of replication of an experimental or correlational finding as discussed by Peter Killeen (2005) is the probability of finding an effect in the same direction upon an exact replication. For articles submitted to Psychological Science, p.rep needs to be reported. F, t, p and r are all estimates of the size of an effect. But F, t, and p also are also a function of the sample size. Effect size, d prime, may be expressed as differences between means compared to within cell standard deviations, or as a correlation coefficient. These functions convert p, F, and t to d prime and the r equivalent. } \usage{ p.rep(p = 0.05, n=NULL,twotailed = FALSE) p.rep.f(F,df2,twotailed=FALSE) p.rep.r(r,n,twotailed=TRUE) p.rep.t(t,df,df2=NULL,twotailed=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{p}{conventional probability of statistic (e.g., of F, t, or r)} \item{F}{The F statistic} \item{df}{Degrees of freedom of the t-test, or of the first group if unequal sizes} \item{df2}{Degrees of freedom of the denominator of F or the second group in an unequal sizes t test} \item{r}{Correlation coefficient} \item{n}{Total sample size if using r } \item{t}{t-statistic if doing a t-test or testing significance of a regression slope} \item{twotailed}{Should a one or two tailed test be used? } } \details{The conventional Null Hypothesis Significance Test (NHST) is the likelihood of observing the data given the null hypothesis of no effect. But this tells us nothing about the probability of the null hypothesis. Peter Killeen (2005) introduced the probability of replication as a more useful measure. The probability of replication is the probability that an exact replication study will find a result in the \emph{same direction} as the original result. p.rep is based upon a 1 tailed probability value of the observed statistic. Other frequently called for statistics are estimates of the effect size, expressed either as Cohen's d, Hedges g, or the equivalent value of the correlation, r. For p.rep.t, if the cell sizes are unequal, the effect size estimates are adjusted by the ratio of the mean cell size to the harmonic mean cell size (see Rownow et al., 2000). } \value{ \item{p.rep }{Probability of replication} \item{dprime }{Effect size (Cohen`s d) if more than just p is specified} \item{prob}{Probability of F, t, or r. Note that this can be either the one-tailed or two tailed probability value.} \item{r.equivalent}{For t-tests, the r equivalent to the t (see Rosenthal and Rubin(2003), Rosnow, Rosenthal, and Rubin, 2000))}. } \references{ Cummings, Geoff (2005) Understanding the average probability of replication: comment on Killeen 2005). Psychological Science, 16, 12, 1002-1004). \cr Killeen, Peter H. (2005) An alternative to Null-Hypothesis Significance Tests. Psychological Science, 16, 345-353 \cr Rosenthal, R. and Rubin, Donald B.(2003), r-sub(equivalent): A Simple Effect Size Indicator. Psychological Methods, 8, 492-496. Rosnow, Ralph L., Rosenthal, Robert and Rubin, Donald B. (2000) Contrasts and correlations in effect-size estimation, Psychological Science, 11. 446-453. } \note{ The p.rep value is the one tailed probability value of obtaining a result in the same direction. } \examples{ p.rep(.05) #probability of replicating a result if the original study had a p = .05 p.rep.f(9.0,98) #probability of replicating a result with F = 9.0 with 98 df p.rep.r(.4,50) #probability of replicating a result if r =.4 with n = 50 p.rep.t(3,98) #probability of replicating a result if t = 3 with df =98 p.rep.t(2.14,84,14) #effect of equal sample sizes (see Rosnow et al.) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} \keyword{univar} psych/man/kappa.Rd0000644000176200001440000002227713400010604013534 0ustar liggesusers\name{cohen.kappa} \alias{wkappa} \alias{cohen.kappa} \title{Find Cohen's kappa and weighted kappa coefficients for correlation of two raters} \description{Cohen's kappa (Cohen, 1960) and weighted kappa (Cohen, 1968) may be used to find the agreement of two raters when using nominal scores. Light's kappa is just the average cohen.kappa if using more than 2 raters. weighted.kappa is (probability of observed matches - probability of expected matches)/(1 - probability of expected matches). Kappa just considers the matches on the main diagonal. Weighted kappa considers off diagonal elements as well. } \usage{ cohen.kappa(x, w=NULL,n.obs=NULL,alpha=.05,levels=NULL) wkappa(x, w = NULL) #deprectated } \arguments{ \item{x}{Either a two by n data with categorical values from 1 to p or a p x p table. If a data array, a table will be found.} \item{w}{A p x p matrix of weights. If not specified, they are set to be 0 (on the diagonal) and (distance from diagonal) off the diagonal)^2. } \item{n.obs}{Number of observations (if input is a square matrix.} \item{alpha}{Probability level for confidence intervals} \item{levels}{Specify the levels if some levels of x or y are completely missing. See Examples} } \details{When cateogorical judgments are made with two cateories, a measure of relationship is the phi coefficient. However, some categorical judgments are made using more than two outcomes. For example, two diagnosticians might be asked to categorize patients three ways (e.g., Personality disorder, Neurosis, Psychosis) or to categorize the stages of a disease. Just as base rates affect observed cell frequencies in a two by two table, they need to be considered in the n-way table (Cohen, 1960). Kappa considers the matches on the main diagonal. A penalty function (weight) may be applied to the off diagonal matches. If the weights increase by the square of the distance from the diagonal, weighted kappa is similar to an Intra Class Correlation (\code{\link{ICC}}). Derivations of weighted kappa are sometimes expressed in terms of similarities, and sometimes in terms of dissimilarities. In the latter case, the weights on the diagonal are 1 and the weights off the diagonal are less than one. In this case, if the weights are 1 - squared distance from the diagonal / k, then the result is similar to the ICC (for any positive k). cohen.kappa may use either similarity weighting (diagonal = 0) or dissimilarity weighting (diagonal = 1) in order to match various published examples. The input may be a two column data.frame or matrix with columns representing the two judges and rows the subjects being rated. Alternatively, the input may be a square n x n matrix of counts or proportion of matches. If proportions are used, it is necessary to specify the number of observations (n.obs) in order to correctly find the confidence intervals. The confidence intervals are based upon the variance estimates discussed by Fleiss, Cohen, and Everitt who corrected the formulae of Cohen (1968) and Blashfield. Some data sets will include data with numeric categories with some category values missing completely. In the sense that kappa is a measure of category relationship, this should not matter. But when finding weighted kappa, the number of categories weighted will be less than the number of categories potentially in the data. This can be remedied by specifying the levels parameter. This is a vector of the levels potentially in the data (even if some are missing). See the examples. If there are more than 2 raters, then the average of all raters is known as Light's kappa. (Conger, 1980). } \value{ \item{kappa }{Unweighted kappa} \item{weighted.kappa }{The default weights are quadratric.} \item{var.kappa}{Variance of kappa} \item{var.weighted}{Variance of weighted kappa} \item{n.obs}{number of observations} \item{weight}{The weights used in the estimation of weighted kappa} \item{confid}{The alpha/2 confidence intervals for unweighted and weighted kappa} \item{plevel}{The alpha level used in determining the confidence limits} } \references{ Banerjee, M., Capozzoli, M., McSweeney, L and Sinha, D. (1999) Beyond Kappa: A review of interrater agreement measures The Canadian Journal of Statistics / La Revue Canadienne de Statistique, 27, 3-23 Cohen, J. (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20 37-46 Cohen, J. (1968). Weighted kappa: Nominal scale agreement provision for scaled disagreement or partial credit. Psychological Bulletin, 70, 213-220. Conger, A. J. (1980) Integration and generalization of kappas for multiple raters, Psychological Bulletin,, 88, 322-328. Fleiss, J. L., Cohen, J. and Everitt, B.S. (1969) Large sample standard errors of kappa and weighted kappa. Psychological Bulletin, 72, 332-327. Light, R. J. (12971) Measures of response agreement for qualitative data: Some generalizations and alternatives, Psychological Bulletin, 76, 365-377. Zwick, R. (1988) Another look at interrater agreement. Psychological Bulletin, 103, 374 - 378. } \author{William Revelle } \note{As is true of many R functions, there are alternatives in other packages. The Kappa function in the vcd package estimates unweighted and weighted kappa and reports the variance of the estimate. The input is a square matrix. The ckappa and wkappa functions in the psy package take raw data matrices. The kappam.light function from the irr package finds Light's average kappa. To avoid confusion with Kappa (from vcd) or the kappa function from base, the function was originally named wkappa. With additional features modified from psy::ckappa to allow input with a different number of categories, the function has been renamed cohen.kappa. Unfortunately, to make it more confusing, the weights described by Cohen are a function of the reciprocals of those discucssed by Fleiss and Cohen. The cohen.kappa function uses the appropriate formula for Cohen or Fleiss-Cohen weights. There are some cases where the large sample size approximation of Fleiss et al. will produce confidence intervals exceeding +/- 1. Clearly, for these cases, the upper (or lower for negative values) should be set to 1. Boot strap resampling shows the problem is that the values are not symmetric. See the last (unrun) example. It is also possible to have more than 2 raters. In this case, cohen.kappa is reported for all pairs of raters (e.g. R1 and R2, R1 and R3, ... R3 and R4). To see the confidence intervals for these cohen.kappas, use the print command with the all=TRUE option. (See the exmaple of multiple raters.) } \examples{ #rating data (with thanks to Tim Bates) rater1 = c(1,2,3,4,5,6,7,8,9) # rater one's ratings rater2 = c(1,3,1,6,1,5,5,6,7) # rater one's ratings cohen.kappa(x=cbind(rater1,rater2)) #data matrix taken from Cohen cohen <- matrix(c( 0.44, 0.07, 0.09, 0.05, 0.20, 0.05, 0.01, 0.03, 0.06),ncol=3,byrow=TRUE) #cohen.weights weight differences cohen.weights <- matrix(c( 0,1,3, 1,0,6, 3,6,0),ncol=3) cohen.kappa(cohen,cohen.weights,n.obs=200) #cohen reports .492 and .348 #another set of weights #what if the weights are non-symmetric wc <- matrix(c( 0,1,4, 1,0,6, 2,2,0),ncol=3,byrow=TRUE) cohen.kappa(cohen,wc) #Cohen reports kw = .353 cohen.kappa(cohen,n.obs=200) #this uses the squared weights fleiss.cohen <- 1 - cohen.weights/9 cohen.kappa(cohen,fleiss.cohen,n.obs=200) #however, Fleiss, Cohen and Everitt weight similarities fleiss <- matrix(c( 106, 10,4, 22,28, 10, 2, 12, 6),ncol=3,byrow=TRUE) #Fleiss weights the similarities weights <- matrix(c( 1.0000, 0.0000, 0.4444, 0.0000, 1.0000, 0.6667, 0.4444, 0.6667, 1.0000),ncol=3) cohen.kappa(fleiss,weights,n.obs=200) #another example is comparing the scores of two sets of twins #data may be a 2 column matrix #compare weighted and unweighted #also look at the ICC for this data set. twins <- matrix(c( 1, 2, 2, 3, 3, 4, 5, 6, 6, 7), ncol=2,byrow=TRUE) cohen.kappa(twins) #data may be explicitly categorical x <- c("red","yellow","blue","red") y <- c("red", "blue", "blue" ,"red") xy.df <- data.frame(x,y) ck <- cohen.kappa(xy.df) ck ck$agree #Example for specifying levels #The problem of missing categories (from Amy Finnegan) #We need to specify all the categories possible using the levels option numbers <- data.frame(rater1=c(6,3,7,8,7), rater2=c(6,1,8,5,10)) cohen.kappa(numbers) #compare with the next analysis cohen.kappa(numbers,levels=1:10) #specify the number of levels # these leads to slightly higher weighted kappa #finally, input can be a data.frame of ratings from more than two raters ratings <- matrix(rep(1:5,4),ncol=4) ratings[1,2] <- ratings[2,3] <- ratings[3,4] <- NA ratings[2,1] <- ratings[3,2] <- ratings[4,3] <- 1 ck <- cohen.kappa(ratings) ck #just show the raw and weighted kappas print(ck, all=TRUE) #show the confidence intervals as well #In the case of confidence intervals being artificially truncated to +/- 1, it is #helpful to compare the results of a boot strap resample #ck.boot <-function(x,s=1:nrow(x)) {cohen.kappa(x[s,])$kappa} #library(boot) #ckb <- boot(x,ck.boot,R=1000) #hist(ckb$t) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/withinBetween.Rd0000644000176200001440000000626213256544714015277 0ustar liggesusers\name{withinBetween} \alias{withinBetween} \docType{data} \title{An example of the distinction between within group and between group correlations} \description{A demonstration that a correlation may be decomposed to a within group correlation and a between group correlations and these two correlations are independent. Between group correlations are sometimes called ecological correlations, the decomposition into within and between group correlations is a basic concept in multilevel modeling. This data set shows the composite correlations between 9 variables, representing 16 cases with four groups. } \usage{data(withinBetween)} \format{ A data frame with 16 observations on the following 10 variables. \describe{ \item{\code{Group}}{An example grouping factor.} \item{\code{V1}}{A column of 16 observations } \item{\code{V2}}{A column of 16 observations } \item{\code{V3}}{A column of 16 observations } \item{\code{V4}}{A column of 16 observations } \item{\code{V5}}{A column of 16 observations } \item{\code{V6}}{A column of 16 observations } \item{\code{V7}}{A column of 16 observations } \item{\code{V8}}{A column of 16 observations } \item{\code{V9}}{A column of 16 observations } } } \details{Correlations between individuals who belong to different natural groups (based upon e.g., ethnicity, age, gender, college major,or country) reflect an unknown mixture of the pooled correlation within each group as well as the correlation of the means of these groups. These two correlations are independent and do not allow inferences from one level (the group) to the other level (the individual). This data set shows this independence. The within group correlations between 9 variables are set to be 1, 0, and -1 while those between groups are also set to be 1, 0, -1. These two sets of correlations are crossed such that V1, V4, and V7 have within group correlations of 1, as do V2, V5 and V8, and V3, V6 and V9. V1 has a within group correlation of 0 with V2, V5, and V8, and a -1 within group correlation with V3, V6 and V9. V1, V2, and V3 share a between group correlation of 1, as do V4, V5 and V6, and V7, V8 and V9. The first group has a 0 between group correlation with the second and a -1 with the third group. \code{\link{statsBy}} can decompose the observed correlation in the between and within correlations. \code{\link{sim.multilevel}} can produce similar data. } \source{The data were created for this example} \references{ P. D. Bliese. Multilevel modeling in R (2.3) a brief introduction to R, the multilevel package and the nlme package, 2009. Pedhazur, E.J. (1997) Multiple regression in behavioral research: explanation and prediction. Harcourt Brace. Revelle, W. An introduction to psychometric theory with applications in R (in prep) Springer. Draft chapters available at \url{https://personality-project.org/r/book/} } \seealso{ \code{\link{statsBy}}, \code{\link{describeBy}}, and \code{\link{sim.multilevel}} } \examples{ data(withinBetween) pairs.panels(withinBetween,bg=c("red","blue","white","black")[withinBetween[,1]], pch=21,ellipses=FALSE,lm=TRUE) stats <- statsBy(withinBetween,'Group') print(stats,short=FALSE) } \keyword{datasets} psych/man/ellipses.Rd0000644000176200001440000000570213464311571014273 0ustar liggesusers\name{ellipses} \alias{ellipses} \alias{minkowski} \title{Plot data and 1 and 2 sigma correlation ellipses} \description{For teaching correlation, it is useful to draw ellipses around the mean to reflect the correlation. This variation of the ellipse function from John Fox's car package does so. Input may be either two vectors or a matrix or data.frame. In the latter cases, if the number of variables >2, then the ellipses are done in the \code{\link{pairs.panels}} function. Ellipses may be added to existing plots. The minkowski function is included as a generalized ellipse. } \usage{ ellipses(x, y = NULL, add = FALSE, smooth=TRUE, lm=FALSE,data=TRUE, n = 2, span=2/3, iter=3, col = "red", xlab =NULL,ylab= NULL,size=c(1,2), ...) minkowski(r=2,add=FALSE,main=NULL,xl=1,yl=1) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector,matrix, or data.frame } \item{y}{Optional second vector } \item{add}{Should a new plot be created, or should it be added to?} \item{smooth}{smooth = TRUE -> draw a loess fit} \item{lm}{lm=TRUE -> draw the linear fit} \item{data}{data=TRUE implies draw the data points} \item{n}{Should 1 or 2 ellipses be drawn } \item{span}{averaging window parameter for the lowess fit} \item{iter}{iteration parameter for lowess} \item{col}{color of ellipses (default is red} \item{xlab}{label for the x axis} \item{ylab}{label for the y axis} \item{size}{The size of ellipses in sd units (defaults to 1 and 2)} \item{\dots}{Other parameters for plotting} \item{r}{r=1 draws a city block, r=2 is a Euclidean circle, r > 2 tends towards a square} \item{main}{title to use when drawing Minkowski circles} \item{xl}{stretch the x axis} \item{yl}{stretch the y axis} } \details{Ellipse dimensions are calculated from the correlation between the x and y variables and are scaled as sqrt(1+r) and sqrt(1-r). They are then scaled as size[1] and size[2] standard deviation units. To scale for 95 and 99 percent confidence use c(1.64,2.32) } \value{ A single plot (for 2 vectors or data frames with fewer than 3 variables. Otherwise a call is made to \code{\link{pairs.panels}}. } \references{ Galton, Francis (1888), Co-relations and their measurement. Proceedings of the Royal Society. London Series, 45, 135-145. } \author{William Revelle } \note{Adapted from John Fox's ellipse and data.ellipse functions. } \seealso{ \code{\link{pairs.panels}} } \examples{ data(psychTools::galton) galton <- psychTools::galton ellipses(galton,lm=TRUE) ellipses(galton$parent,galton$child,xlab="Mid Parent Height", ylab="Child Height") #input are two vectors data(sat.act) ellipses(sat.act) #shows the pairs.panels ellipses minkowski(2,main="Minkowski circles") minkowski(1,TRUE) minkowski(4,TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/testRetest.Rd0000644000176200001440000002362714274761660014637 0ustar liggesusers\name{testRetest} \alias{testRetest} \alias{testReliability} \title{Find various test-retest statistics, including test, person and item reliability} \description{Given two presentations of a test, it is straightforward to find the test-retest reliablity, as well as the item reliability and person stability across items. Using the multi-level structure of the data, it is also possible to do a variance deomposition to find variance components for people, items, time, people x time, people x items, and items x time as well as the residual variance. This leads to various generalizability cofficients. } \usage{ testRetest(t1,t2=NULL,keys=NULL,id="id", time= "time", select=NULL, check.keys=TRUE, warnings=TRUE,lmer=TRUE,sort=TRUE) } \arguments{ \item{t1}{a data.frame or matrix for the first time of measurement.} \item{t2}{a data.frame or matrix for the second time of measurement. May be NULL if time is specifed in t1} \item{keys}{item names (or locations) to analyze, preface by "-" to reverse score. } \item{id}{subject identification codes to match across time} \item{time}{The name of the time variable identifying time 1 or 2 if just one data set is supplied. } \item{select}{A subset of items to analyze} \item{check.keys}{If TRUE will automatically reverse items based upon their correlation with the first principal component. Will throw a warning when doing so, but some people seem to miss this kind of message.} \item{warnings}{If TRUE, then warn when items are reverse scored} \item{lmer}{If TRUE, include the lmer variance decomposition. By default, this is true, but this can lead to long times for large data sets. } \item{sort}{If TRUE, the data are sorted by id and time. This allows for random ordering of data, but will fail if ids are duplicated in different studies. In that case, we need to add a constant to the ids for each study. See the last example.} } \details{There are many ways of measuring reliability. Test - Retest is one way. If the time interval is very short (or immediate), this is known as a dependability correlation, if the time interval is longer, a stability coefficient. In all cases, this is a correlation between two measures at different time points. Given the multi-level nature of these data, it is possible to find variance components associated with individuals, time, item, and time by item, etc. This leads to several different estimates of reliability (see \code{\link{multilevel.reliability}} for a discussion and references). It is also possible to find the subject reliability across time (this is the correlation across the items at time 1 with time 2 for each subject). This is a sign of subject reliability (Wood et al, 2017). Items can show differing amounts of test-retest reliability over time. Unfortunately, the within person correlation has problems if people do not differ very much across items. If all items are in the same keyed direction, and measuring the same construct, then the response profile for an individual is essentially flat. This implies that the even with almost perfect reproducibility, that the correlation can actually be negative. The within person distance (d2) across items is just the mean of the squared differences for each item. Although highly negatively correlated with the rqq score, this does distinguish between random responders (high dqq and low rqq) from consistent responders with lower variance (low dqq and low rqq). Several individual statistics are reported in the scores object. These can be displayed by using \code{\link{pairs.panels}} for a graphic display of the relationship and ranges of the various measures. Although meant to decompose the variance for tests with items nested within tests, if just given two tests, the variance components for people and for time will also be shown. The resulting variance ratio of people to total variance is the intraclass correlation between the two tests. See also \code{\link{ICC}} for the more general case. } \value{ \item{r12}{The time 1 time 2 correlation of scaled scores across time} \item{alpha}{Guttman's lambda 3 (aka alpha) and lambda 6* (item reliabilities based upon smcs) are found for the scales at times 1 and 2.} \item{rqq}{The within subject test retest reliability of response patterns over items} \item{item.stats}{Item reliabilities, item loadings at time 1 and 2, item means at time 1 and time 2} \item{scores}{A data frame of principal component scores at time 1 and time 2, raw scores from time 1 and time 2, the within person standard deviation for time 1 and time 2, and the rqq and dqq scores for each subject. } \item{xy.df}{If given separate t1 and t2 data.frames, this is combination suitable for using \code{\link{multilevel.reliability}} } \item{key}{A key vector showing which items have been reversed} \item{ml}{The multilevel output} } \references{ Cattell, R. B. (1964). Validity and reliability: A proposed more basic set of concepts. Journal of Educational Psychology, 55(1), 1 - 22. doi: 10.1037/h0046462 Cranford, J. A., Shrout, P. E., Iida, M., Rafaeli, E., Yip, T., & Bolger, N. (2006). A procedure for evaluating sensitivity to within-person change: Can mood measures in diary studies detect change reliably? Personality and Social Psychology Bulletin, 32(7), 917-929. DeSimone, J. A. (2015). New techniques for evaluating temporal consistency. Organizational Research Methods, 18(1), 133-152. doi: 10.1177/1094428114553061 Revelle, W. and Condon, D. Reliability from alpha to omega: A tutorial. Psychological Assessment, 31 (12) 1395-1411. Revelle, W. (in preparation) An introduction to psychometric theory with applications in {R}. Springer. (Available online at \url{https://personality-project.org/r/book/}). Shrout, P. E., & Lane, S. P. (2012). Psychometrics. In Handbook of research methods for studying daily life. Guilford Press. Wood, D., Harms, P. D., Lowman, G. H., & DeSimone, J. A. (2017). Response speed and response consistency as mutually validating indicators of data quality in online samples. Social Psychological and Personality Science, 8(4), 454-464. doi: 10.1177/1948550617703168 } \author{ William Revelle } \note{ lmer=TRUE is the default and will do the variance decomposition using lmer. This will take some time. For 3032 cases with 10 items from the msqR and sai data set, this takes 92 seconds, but just .63 seconds if lmer = FALSE. For the 1895 subjects with repeated measures on the \code{\link[psychTools]{sai}}, it takes 85 seconds with lmer and .38 without out lmer. In the case of just two tests (no items specified), the item based statistics (alpha, rqq, item.stats, scores, xy.df) are not reported. Several examples are given. The first takes 200 cases from the \code{\link[psychTools]{sai}} data set. Subjects were given the \code{link[psychTools]{sai}} twice with an intervening mood manipulation (four types of short film clips, with or without placebo/caffeine). The test retest stability of the sai are based upon the 20 sai items. The second example compares the scores of the 10 sai items that overlap with 10 items from the \code{\link[psychTools]{msqR}} data set from the same study. \code{link[psychTools]{sai}} and \code{\link[psychTools]{msqR}} were given immediately after each other and although the format differs slightly, can be seen as measures of dependability. The third example considers the case of the Impulsivity scale from the Eysenck Personality Inventory. This is a nice example of how different estimates of reliability will differ. The alpha reliability is .51 but the test-retest correlation across several weeks is .71! That is to say, the items don't correlate very much with each other (alpha) but do with themselves across time (test-retest). The fourth example takes the same data and jumble the subjects (who are identified by their ids). This result should be the same as the prior result because the data are automatically sorted by id. Although typically, there are as many subjects at time 1 as time 2, testRetest will handle the case of a different number of subjects. The data are first sorted by time and id, and then those cases from time 1 that are matched at time 2 are analyzed. It is important to note that the subject id numbers must be unique. } \seealso{ \code{\link{alpha}}, \code{\link{omega}} \code{\link{scoreItems}}, \code{\link{cor2}} } \examples{ \donttest{ #for faster compiling, dont test #lmer set to FALSE for speed. #set lmer to TRUE to get variance components sai.xray <- subset(psychTools::sai,psychTools::sai$study=="XRAY") #The case where the two measures are identified by time #automatically reverses items but throws a warning stability <- testRetest(sai.xray[-c(1,3)],lmer=FALSE) stability #show the results #get a second data set sai.xray1 <- subset(sai.xray,sai.xray$time==1) msq.xray <- subset(psychTools::msqR, (psychTools::msqR$study=="XRAY") & (psychTools::msqR$time==1)) select <- colnames(sai.xray1)[is.element(colnames(sai.xray1 ),colnames(psychTools::msqR))] select <-select[-c(1:3)] #get rid of the id information #The case where the two times are in the form x, y dependability <- testRetest(sai.xray1,msq.xray,keys=select,lmer=FALSE) dependability #show the results #now examine the Impulsivity subscale of the EPI #use the epiR data set which includes epi.keys data("epiR",package="psychTools") #Imp <- selectFromKeys(epi.keys$Imp) #fixed temporarily with Imp <- c("V1", "V3", "V8", "V10","V13" ,"V22", "V39" , "V5" , "V41") imp.analysis <- testRetest(psychTools::epiR,select=Imp) #test-retest = .7, alpha=.51,.51 imp.analysis #demonstrate random ordering -- the results should be the same n.obs <- NROW(psychTools::epiR) set.seed(42) ss <- sample(n.obs,n.obs) temp.epi <- psychTools::epiR temp.epi <-char2numeric(temp.epi) #make the study numeric temp.epi$id <- temp.epi$id + 300*temp.epi$study random.epi <- temp.epi[ss,] random.imp.analysis <- testRetest(random.epi,select=Imp) } } \keyword{ multivariate }% use one of RShowDoc("KEYWORDS") \keyword{ models }% __ONLY ONE__ keyword per line psych/man/VSS.scree.Rd0000644000176200001440000000316314346224631014225 0ustar liggesusers\name{VSS.scree} \alias{VSS.scree} \alias{scree} \title{Plot the successive eigen values for a scree test} \description{Cattell's scree test is one of most simple ways of testing the number of components or factors in a correlation matrix. Here we plot the eigen values of a correlation matrix as well as the eigen values of a factor analysis. } \usage{ scree(rx,factors=TRUE,pc=TRUE,main="Scree plot",hline=NULL,add=FALSE,sqrt=FALSE) VSS.scree(rx, main = "scree plot",sqrt=FALSE) } \details{Among the many ways to choose the optimal number of factors is the scree test. A better function to show the scree as well as compare it to randomly parallel solutions is found found in \code{\link{fa.parallel}} Following a suggestion from Marco Del Giudice, I added the sqrt option for version 2.2.12. - } \arguments{ \item{rx}{ a correlation matrix or a data matrix. If data, then correlations are found using pairwise deletions. } \item{factors}{If true, draw the scree for factors } \item{pc}{If true, draw the scree for components} \item{hline}{if null, draw a horizontal line at 1, otherwise draw it at hline (make negative to not draw it)} \item{main}{ Title } \item{add}{Should multiple plots be drawn?} \item{sqrt}{If TRUE, take the sqrt of the eigen value before plotting} } \references{ \url{https://personality-project.org/r/vss.html}} \author{William Revelle } \seealso{ \code{\link{fa.parallel}} \code{\link{VSS.plot}}, \code{\link{ICLUST}}, \code{\link{omega}}} \examples{ scree(attitude) #VSS.scree(cor(attitude) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/polychor.matrix.Rd0000644000176200001440000000352112216212621015600 0ustar liggesusers\name{polychor.matrix} \alias{polychor.matrix} \alias{Yule2poly.matrix} \alias{phi2poly.matrix} \alias{Yule2phi.matrix} \title{Phi or Yule coefficient matrix to polychoric coefficient matrix} \description{A set of deprecated functions that have replaced by \code{\link{Yule2tetra}} and \code{\link{Yule2phi}}. Some older correlation matrices were reported as matrices of Phi or of Yule correlations. That is, correlations were found from the two by two table of counts: \cr \tabular{lll}{ \tab a \tab b \cr \tab c \tab d \cr } Yule Q is (ad - bc)/(ad+bc). \cr With marginal frequencies of a+b, c+d, a+c, b+d. Given a square matrix of such correlations, and the proportions for each variable that are in the a + b cells, it is possible to reconvert each correlation into a two by two table and then estimate the corresponding polychoric correlation (using John Fox's polychor function. } \usage{ Yule2poly.matrix(x, v) #deprectated phi2poly.matrix(x, v) #deprectated Yule2phi.matrix(x, v) #deprectated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a matrix of phi or Yule coefficients } \item{v}{A vector of marginal frequencies } } \details{These functions call \code{\link{Yule2poly}}, \code{\link{Yule2phi}} or \code{\link{phi2poly}} for each cell of the matrix. See those functions for more details. See \code{\link{phi.demo}} for an example. } \value{A matrix of correlations } \author{ William Revelle} \examples{ #demo <- phi.demo() #compare the phi (lower off diagonal and polychoric correlations (upper off diagonal) #show the result from poly.mat #round(demo$tetrachoric$rho,2) #show the result from phi2poly #tetrachorics above the diagonal, phi below the diagonal #round(demo$phis,2) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ multivariate }% __ONLY ONE__ keyword per line psych/man/mssd.Rd0000644000176200001440000000714713161505351013421 0ustar liggesusers\name{mssd} \alias{mssd} \alias{rmssd} \alias{autoR} \title{Find von Neuman's Mean Square of Successive Differences} \description{ Von Neuman et al. (1941) discussed the Mean Square of Successive Differences as a measure of variability that takes into account gradual shifts in mean. This is appropriate when studying errors in ballistics or variability and stability in mood when studying affect. For random data, this will be twice the variance, but for data with a sequential order and a positive autocorrelation, this will be much smaller. Since the mssd is just twice the variance - the autocorrelation, it is thus possible to also find the autocorrelation for a particular lag. } \usage{ mssd(x,group=NULL, lag = 1,na.rm=TRUE) rmssd(x,group=NULL, lag=1, na.rm=TRUE) autoR(x,group=NULL,lag=1,na.rm=TRUE,use="pairwise") } \arguments{ \item{x}{a vector, data.frame or matrix} \item{lag}{the lag to use when finding \code{\link{diff}} } \item{group}{A column of the x data.frame to be used for grouping} \item{na.rm}{Should missing data be removed?} \item{use}{How to handle missing data in autoR}. } \details{When examining multiple measures within subjects, it is sometimes useful to consider the variability of trial by trial observations in addition to the over all between trial variation. The Mean Square of Successive Differences (mssd) and root mean square of successive differences (rmssd) is just \eqn{\sigma^2 = \Sigma(x_i - x_{i+1})^2 /(n-lag) } Where n-lag is used because there are only n-lag cases. In the case of multiple subjects (groups) with multiple observations per subject (group), specify the grouping variable will produce output for each group. Similar functions are available in the matrixStats package. However, the varDiff function in that package is variance of the difference rather than the MeanSquare. This is just the variance and standard deviation applied to the result from the \code{\link{diff}} function. Perhaps useful when studying mood, the \code{\link{autoR}} function finds the autocorrelation for each item for the specified lag. It also returns the rmssd (root means square successive difference). This is done by finding the correlation of the lag data. } \value{ The average squared successive difference (mssd) and the square root of the average squared successive difference (rmssd). Note that this is not the same as the standard deviation of the lagged differences. } \references{ Jahng, Seungmin and Wood, Phillip K and Trull, Timothy J. Analysis of affective instability in ecological momentary assessment: Indices using successive difference and group comparison via multilevel modeling. Psychological methods (2008) 13, 354-375. Von Neumann, J., Kent, R., Bellinson, H., and Hart, B. (1941). The mean square successive difference. The Annals of Mathematical Statistics, pages 153-162. } \author{William Revelle} \seealso{See Also \code{\link{rmssd}} for the standard deviation or \code{\link{describe}} for more conventional statistics. \code{\link{describeBy}} and \code{\link{statsBy}} give group level statistics. See also \code{link{mlr}}, \code{link{mlreliability}}, \code{link{mlPlot}} for other ways of examining within person variability over time. } \examples{ t <- seq(-pi, pi, .1) trial <- 1: length(t) gr <- trial \%\% 8 c <- cos(t) ts <- sample(t,length(t)) cs <- cos(ts) x.df <- data.frame(trial,gr,t,c,ts,cs) rmssd(x.df) rmssd(x.df,gr) autoR(x.df,gr) describe(x.df) #pairs.panels(x.df) #mlPlot(x.df,grp="gr",Time="t",items=c(4:6)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{models} psych/man/plot.psych.Rd0000755000176200001440000001346714063003214014554 0ustar liggesusers\name{plot.psych} \alias{plot.psych} \alias{plot.poly} \alias{plot.irt} \alias{plot.residuals} \title{Plotting functions for the psych package of class ``psych"} \description{Combines several plotting functions into one for objects of class ``psych". This can be used to plot the results of \code{\link{fa}}, \code{\link{irt.fa}}, \code{\link{VSS}}, \code{\link{ICLUST}}, \code{\link{omega}}, \code{\link{factor.pa}}, or \code{\link{principal}}. } \usage{ \method{plot}{psych}(x,labels=NULL,...) \method{plot}{irt}(x,xlab,ylab,main,D,type=c("ICC","IIC","test"),cut=.3,labels=NULL, keys=NULL, xlim,ylim,y2lab,lncol="black",...) \method{plot}{poly}(x,D,xlab,ylab,xlim,ylim,main,type=c("ICC","IIC","test"),cut=.3,labels, keys=NULL,y2lab,lncol="black",...) \method{plot}{residuals}(x,main,type=c("qq","chi","hist","cor"),std, bad=4, numbers=TRUE, upper=FALSE,diag=FALSE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{The object to plot } \item{labels}{Variable labels} \item{xlab}{Label for the x axis -- defaults to Latent Trait} \item{ylab}{Label for the y axis} \item{xlim}{The limits for the x axis} \item{ylim}{Specify the limits for the y axis} \item{main}{Main title for graph} \item{type}{"ICC" plots items, "IIC" plots item information, "test" plots test information, defaults to IIC.,"qq" does a quantile plot,"chi" plots chi square distributions,"hist" shows the histogram,"cor" does a corPlot of the residuals. } \item{D}{The discrimination parameter} \item{cut}{Only plot item responses with discrimiantion greater than cut} \item{keys}{Used in plotting irt results from irt.fa.} \item{y2lab}{ylab for test reliability, defaults to "reliability"} \item{bad}{label the most 1.. bad items in residuals} \item{numbers}{if using the cor option in plot residuals, show the numeric values} \item{upper}{if using the cor option in plot residuals, show the upper off diagonal values} \item{diag}{if using the cor option in plot residuals, show the diagonal values} \item{std}{Standardize the resduals?} \item{lncol}{The color of the lines in the IRT plots. Defaults to all being black, but it is possible to specify lncol as a vector of colors to be used.} \item{...}{other calls to plot} } \details{Passes the appropriate values to plot. For plotting the results of \code{\link{irt.fa}}, there are three options: type = "IIC" (default) will plot the item characteristic respone function. type = "IIC" will plot the item information function, and type= "test" will plot the test information function. Note that plotting an irt result will call either plot.irt or plot.poly depending upon the type of data that were used in the original \code{\link{irt.fa}} call. These are calls to the generic plot function that are intercepted for objects of type "psych". More precise plotting control is available in the separate plot functions. plot may be used for psych objects returned from \code{\link{fa}}, \code{\link{irt.fa}}, \code{\link{ICLUST}}, \code{\link{omega}}, \code{\link{principal}} as well as \code{\link{plot.reliability}} . A "jiggle" parameter is available in the fa.plot function (called from plot.psych when the type is a factor or cluster. If jiggle=TRUE, then the points are jittered slightly (controlled by amount) before plotting. This option is useful when plotting items with identical factor loadings (e.g., when comparing hypothetical models). Objects from \code{\link{irt.fa}} are plotted according to "type" (Item informations, item characteristics, or test information). In addition, plots for selected items may be done if using the keys matrix. Plots of irt information return three invisible objects, a summary of information for each item at levels of the trait, the average area under the curve (the average information) for each item as well as where the item is most informative. If plotting multiple factor solutions in plot.poly, then main can be a vector of names, one for each factor. The default is to give main + the factor number. It is also possible to create irt like plots based upon just a scoring key and item difficulties, or from a factor analysis and item difficulties. These are not true IRT type analyses, in that the parameters are not estimated from the data, but are rather indications of item location and discrimination for arbitrary sets of items. To do this, find \code{\link{irt.stats.like}} and then plot the results. \code{\link{plot.residuals}} allows the user to graphically examine the residuals of models formed by \code{\link{fa}}, \code{\link{irt.fa}}, \code{\link{omega}}, as well as \code{\link{principal}} and display them in a number of ways. "qq" will show quantiles of standardized or unstandardized residuals, "chi" will show quantiles of the squared standardized or unstandardized residuals plotted against the expected chi square values, "hist" will draw the histogram of the raw or standardized residuals, and "cor" will show a corPlot of the residual correlations. } \value{Graphic output for factor analysis, cluster analysis and item response analysis. } \author{William Revelle } \note{ More precise plotting control is available in the separate plot functions. } \seealso{ \code{\link{VSS.plot}} and \code{\link{fa.plot}}, \code{\link{cluster.plot}}, \code{\link{fa}}, \code{\link{irt.fa}}, \code{\link{VSS}}, \code{\link{ICLUST}}, \code{\link{omega}}, \code{\link{principal}} or \code{\link{plot.reliability}} } \examples{ test.data <- Harman74.cor$cov f4 <- fa(test.data,4) plot(f4) plot(resid(f4)) plot(resid(f4),main="Residuals from a 4 factor solution",qq=FALSE) #not run #data(bfi) #e.irt <- irt.fa(bfi[11:15]) #just the extraversion items #plot(e.irt) #the information curves # ic <- iclust(test.data,3) #shows hierarchical structure plot(ic) #plots loadings # } \keyword{ multivariate } psych/man/dummy.code.Rd0000644000176200001440000000400613463071347014516 0ustar liggesusers\name{dummy.code} \alias{dummy.code} \title{Create dummy coded variables} \description{Given a variable x with n distinct values, create n new dummy coded variables coded 0/1 for presence (1) or absence (0) of each variable. A typical application would be to create dummy coded college majors from a vector of college majors. Can also combine categories by group. By default, NA values of x are returned as NA (added 10/20/17) } \usage{ dummy.code(x,group=NULL,na.rm=TRUE,top=NULL,min=NULL) } \arguments{ \item{x}{A vector to be transformed into dummy codes} \item{group}{A vector of categories to be coded as 1, all others coded as 0.} \item{na.rm}{If TRUE, return NA for all codes with NA in x} \item{top}{If specified, then just dummy code the top values, and make the rest NA} \item{min}{If specified, then dummy code all values >= min} } \details{When coding demographic information, it is typical to create one variable with multiple categorical values (e.g., ethnicity, college major, occupation). \code{\link{dummy.code}} will convert these categories into n distinct dummy coded variables. If there are many possible values (e.g., country in the SAPA data set) then specifying top will assign dummy codes to just a subset of the data. If using dummy coded variables as predictors, remember to use n-1 variables. If group is specified, then all values of x that are in group are given the value of 1, otherwise, 0. (Useful for combining a range of science majors into STEM or not. The example forms a dummy code of any smoking at all.) } \value{A matrix of dummy coded variables} \author{William Revelle} \examples{ new <- dummy.code(sat.act$education) new.sat <- data.frame(new,sat.act) round(cor(new.sat,use="pairwise"),2) #dum.smoke <- dummy.code(spi$smoke,group=2:9) #table(dum.smoke,spi$smoke) #dum.age <- dummy.code(round(spi$age/5)*5,top=5) #the most frequent five year blocks } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models} psych/man/print.psych.Rd0000644000176200001440000000664013571051006014727 0ustar liggesusers\name{print.psych} \alias{print.psych} \alias{summary.psych} \title{ Print and summary functions for the psych class } \description{Give limited output (print) or somewhat more detailed (summary) for most of the functions in psych. } \usage{ \method{print}{psych}(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,short=TRUE,lower=TRUE,signif=NULL,...) \method{summary}{psych}(object,digits=2,items=FALSE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ Output from a psych function (e.g., factor.pa, omega,ICLUST, score.items, cluster.cor} \item{object}{Output from a psych function} \item{items}{items=TRUE (default) does not print the item whole correlations} \item{digits}{Number of digits to use in printing} \item{all}{if all=TRUE, then the object is declassed and all output from the function is printed} \item{cut}{Cluster loadings < cut will not be printed. For the factor analysis functions (fa and factor.pa etc.), cut defaults to 0, for ICLUST to .3, for omega to .2.} \item{sort}{Cluster loadings are in sorted order} \item{short}{Controls how much to print} \item{lower}{For square matrices, just print the lower half of the matrix} \item{signif}{If not NULL, a numeric value, show just signif number of leading digits for describe output} \item{...}{More options to pass to summary and print} } \details{Most of the psych functions produce too much output. print.psych and summary.psych use generic methods for printing just the highlights. To see what else is available, ask for the structure of the particular object: (str(theobject) ). Alternatively, to get complete output, unclass(theobject) and then print it. This may be done by using the all=TRUE option. As an added feature, if the promax function is applied to a factanal loadings matrix, the normal output just provides the rotation matrix. print.psych will provide the factor correlations. (Following a suggestion by John Fox and Uli Keller to the R-help list). The alternative is to just use the Promax function directly on the factanal object. } \value{ Various psych functions produce copious output. This is a way to summarize the most important parts of the output of the score.items, cluster.scores, and ICLUST functions. See those ( \code{\link{score.items}}, \code{\link{cluster.cor}}, \code{\link{cluster.loadings}}, or \code{\link{ICLUST}}) for details on what is produced. The signf option is available for the output from \code{\link{describe}} to adjust the number of digits shown for all columns. This is slightly different from what happens if you specify digits, which rounds all output to the number of digits. print(x,signif=3) will print just the 3 largest digits of x, which will frequently result in scientific notation for any column where that would be appropriate for at least one row. } \author{ William Revelle} \note{ See \code{\link{score.items}}, \code{\link{cluster.cor}}, \code{\link{cluster.loadings}}, or \code{\link{ICLUST}}for details on what is printed. } \examples{ data(bfi) keys.list <- list(agree=c(-1,2:5),conscientious=c(6:8,-9,-10), extraversion=c(-11,-12,13:15),neuroticism=c(16:20),openness = c(21,-22,23,24,-25)) keys <- make.keys(25,keys.list,item.labels=colnames(psychTools::bfi[1:25])) scores <- score.items(keys,psychTools::bfi[1:25]) scores summary(scores) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/error.circles.Rd0000644000176200001440000001137213463642550015232 0ustar liggesusers\name{errorCircles} \alias{errorCircles} %- Also NEED an '\alias' for EACH other topic documented here. \title{Two way plots of means, error bars, and sample sizes} \description{Given a matrix or data frame, data, find statistics based upon a grouping variable and then plot x and y means with error bars for each value of the grouping variable. If the data are paired (e.g. by gender), then plot means and error bars for the two groups on all variables. } \usage{ errorCircles(x, y, data, ydata = NULL, group=NULL, paired = FALSE, labels = NULL, main = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL,add=FALSE, pos = NULL, offset = 1, arrow.len = 0.2, alpha = 0.05, sd = FALSE, bars = TRUE, circles = TRUE, colors=NULL,col.arrows=NULL,col.text=NULL,circle.size=1, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{The x variable (by name or number) to plot} \item{y}{The y variable (name or number) to plot} \item{data}{The matrix or data.frame to use for the x data} \item{ydata}{If plotting data from two data.frames, then the y variable of the ydata frame will be used.} \item{group}{If specified, then \code{\link{statsBy}} is called first to find the statistics by group} \item{paired}{If TRUE, plot all x and y variables for the two values of the grouping variable.} \item{labels}{Variable names} \item{main}{Main title for plot} \item{xlim}{xlim values if desired-- defaults to min and max mean(x) +/- 2 se} \item{ylim}{ylim values if desired -- defaults to min and max mean(y) +/- 2 se} \item{xlab}{label for x axis -- grouping variable 1} \item{ylab}{label for y axis -- grouping variable 2} \item{add}{If TRUE, add to the prior plot} \item{pos}{Labels are located where with respect to the mean? } \item{offset}{Labels are then offset from this location} \item{arrow.len}{ Arrow length } \item{alpha}{alpha level of error bars } \item{sd}{if sd is TRUE, then draw means +/- 1 sd)} \item{bars}{Should error.bars be drawn for both x and y } \item{circles}{Should circles representing the relative sample sizes be drawn?} \item{colors}{Plot the points using colors -- default is black} \item{col.text}{What color for the text labels (defaults to colors)} \item{col.arrows}{What color should the arrows and circles be? Defaults to colors} \item{circle.size}{A scaling parameter for the error.circles. Defaults to 1, but can be adjusted downwards to make them less instrusive.} \item{\dots}{ Other parameters for plot } } \details{ When visualizing the effect of an experimental manipulation or the relationship of multiple groups, it is convenient to plot their means as well as their confidence regions in a two dimensional space. The diameter of the enclosing circle (ellipse) scales as 1/sqrt(N) * the maximum standard error of all variables. That is to say, the area of the ellipse reflects sample size. } \value{If the group variable is specified, then the statistics from \code{\link{statsBy}} are (invisibly) returned. } \author{William Revelle} \note{Basically this is a combination (and improvement) of \code{\link{statsBy}} with \code{\link{error.crosses}}. Can also serve some of the functionality of \code{\link{error.bars.by}} (see the last example). } \seealso{\code{\link{statsBy}}, \code{\link{describeBy}}, \code{\link{error.crosses}}} \examples{ #BFI scores for males and females errorCircles(1:25,1:25,data=psychTools::bfi,group="gender",paired=TRUE,ylab="female scores", xlab="male scores",main="BFI scores by gender") abline(a=0,b=1) #drop the circles since all samples are the same sizes errorCircles(1:25,1:25,data=psychTools::bfi,group="gender",paired=TRUE,circles=FALSE, ylab="female scores",xlab="male scores",main="BFI scores by gender") abline(a=0,b=1) data(psychTools::affect) colors <- c("black","red","white","blue") films <- c("Sad","Horror","Neutral","Happy") affect.stats <- errorCircles("EA2","TA2",data=psychTools::affect[-c(1,20)], group="Film",labels=films, xlab="Energetic Arousal",ylab="Tense Arousal",ylim=c(10,22),xlim=c(8,20), pch=16,cex=2,colors=colors, main ="EA and TA pre and post affective movies") #now, use the stats from the prior run errorCircles("EA1","TA1",data=affect.stats,labels=films,pch=16,cex=2,colors=colors,add=TRUE) #show sample size with the size of the circles errorCircles("SATV","SATQ",sat.act,group="education") #Can also provide error.bars.by functionality errorCircles(2,5,group=2,data=sat.act,circles=FALSE,pch=16,colors="blue", ylim= c(200,800),main="SATV by education",labels="") #just do the breakdown and then show the points # errorCircles(3,5,group=3,data=sat.act,circles=FALSE,pch=16,colors="blue", # ylim= c(200,800),main="SATV by age",labels="",bars=FALSE) } \keyword{ multivariate } \keyword{ hplot } psych/man/SD.Rd0000644000176200001440000000305111645327156012761 0ustar liggesusers\name{SD} \alias{SD} \title{ Find the Standard deviation for a vector, matrix, or data.frame - do not return error if there are no cases } \description{Find the standard deviation of a vector, matrix, or data.frame. In the latter two cases, return the sd of each column. Unlike the sd function, return NA if there are no observations rather than throw an error. } \usage{ SD(x, na.rm = TRUE) #deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, data.frame, or matrix } \item{na.rm}{na.rm is assumed to be TRUE} } \details{Finds the standard deviation of a vector, matrix, or data.frame. Returns NA if no cases. Just an adaptation of the stats:sd function to return the functionality found in R < 2.7.0 or R >= 2.8.0 Because this problem seems to have been fixed, SD will be removed eventually.} \value{The standard deviation } \author{ William Revelle } \note{ Until R 2.7.0, sd would return a NA rather than an error if no cases were observed. SD brings back that functionality. Although unusual, this condition will arise when analyzing data with high rates of missing values. This function will probably be removed as 2.7.0 becomes outdated.} \seealso{ These functions use SD rather than sd: \code{\link{describe.by}}, \code{\link{skew}}, \code{\link{kurtosi}} } \examples{ data(attitude) apply(attitude,2,sd) #all complete attitude[,1] <- NA SD(attitude) #missing a column describe(attitude) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ models } psych/man/irt.item.diff.rasch.Rd0000644000176200001440000000407711672536515016226 0ustar liggesusers\name{irt.item.diff.rasch} \alias{irt.item.diff.rasch} \alias{irt.discrim} \title{Simple function to estimate item difficulties using IRT concepts} \description{Steps toward a very crude and preliminary IRT program. These two functions estimate item difficulty and discrimination parameters. A better procedure is to use \code{\link{irt.fa}} or the ltm package. } \usage{ irt.item.diff.rasch(items) irt.discrim(item.diff,theta,items) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{items}{ a matrix of items } \item{item.diff}{a vector of item difficulties (found by irt.item.diff)} \item{theta}{ability estimate from irt.person.theta} } \details{Item Response Theory (aka "The new psychometrics") models individual responses to items with a logistic function and an individual (theta) and item difficulty (diff) parameter. irt.item.diff.rasch finds item difficulties with the assumption of theta=0 for all subjects and that all items are equally discriminating. irt.discrim takes those difficulties and theta estimates from \code{\link{irt.person.rasch}} to find item discrimination (beta) parameters. A far better package with these features is the ltm package. The IRT functions in the psych-package are for pedagogical rather than production purposes. They are believed to be accurate, but are not guaranteed. They do seem to be slightly more robust to missing data structures associated with SAPA data sets than the ltm package. The \code{\link{irt.fa}} function is also an alternative. This will find \code{\link{tetrachoric}} or \code{\link{polychoric}} correlations and then convert to IRT parameters using factor analysis (\code{\link{fa}}). } \value{a vector of item difficulties or item discriminations. } \author{William Revelle } \note{ Under development. Not recommended for public consumption. See \code{\link{irt.fa}} and \code{\link{score.irt}} for far better options. } \seealso{ \code{\link{irt.fa}}, \code{\link{irt.person.rasch}} } \keyword{ multivariate}% at least one, from doc/KEYWORDS \keyword{ models}% __ONLY ONE__ keyword per line psych/man/sim.anova.Rd0000644000176200001440000001117414510567213014345 0ustar liggesusers\name{sim.anova} \Rdversion{1.1} \alias{sim.anova} \title{Simulate a 3 way balanced ANOVA or linear model, with or without repeated measures. } \description{For teaching basic statistics, it is useful to be able to generate examples suitable for analysis of variance or simple linear models. sim.anova will generate the design matrix of three independent variables (IV1, IV2, IV3) with an arbitrary number of levels and effect sizes for each main effect and interaction. IVs can be either continuous or categorical and can have linear or quadratic effects. Either a single dependent variable or multiple (within subject) dependent variables are generated according to the specified model. The repeated measures are assumed to be tau equivalent with a specified reliability. } \usage{ sim.anova(es1 = 0, es2 = 0, es3 = 0, es12 = 0, es13 = 0, es23 = 0, es123 = 0, es11=0,es22=0, es33=0,n = 2,n1 = 2, n2 = 2, n3 = 2, within=NULL,r=.8,factors=TRUE,center = TRUE,std=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{es1}{Effect size of IV1} \item{es2}{Effect size of IV2} \item{es3}{Effect size of IV3} \item{es12}{Effect size of the IV1 x IV2 interaction} \item{es13}{Effect size of the IV1 x IV3 interaction} \item{es23}{Effect size of the IV2 x IV3 interaction} \item{es123}{Effect size of the IV1 x IV2 * IV3 interaction} \item{es11}{Effect size of the quadratric term of IV1} \item{es22}{Effect size of the quadratric term of IV2} \item{es33}{Effect size of the quadratric term of IV3} \item{n}{Sample size per cell (if all variables are categorical) or (if at least one variable is continuous), the total sample size} \item{n1}{Number of levels of IV1 (0) if continuous} \item{n2}{Number of levels of IV2} \item{n3}{Number of levels of IV3} \item{within}{if not NULL, then within should be a vector of the means of any repeated measures.} \item{r}{the correlation between the repeated measures (if they exist). This can be thought of as the reliablility of the measures.} \item{factors}{report the IVs as factors rather than numeric} \item{center}{center=TRUE provides orthogonal contrasts, center=FALSE adds the minimum value + 1 to all contrasts} \item{std}{Standardize the effect sizes by standardizing the IVs} } \details{A simple simulation for teaching about ANOVA, regression and reliability. A variety of demonstrations of the relation between anova and lm can be shown. The default is to produce categorical IVs (factors). For more than two levels of an IV, this will show the difference between the linear model and anova in terms of the comparisons made. The within vector can be used to add congenerically equivalent dependent variables. These will have intercorrelations (reliabilities) of r and means as specified as values of within. To demonstrate the effect of centered versus non-centering, make factors = center=FALSE. The default is to center the IVs. By not centering them, the lower order effects will be incorrect given the higher order interaction terms. } \value{y.df is a data.frame of the 3 IV values as well as the DV values. \item{IV1 ... IV3}{Independent variables 1 ... 3} \item{DV}{If there is a single dependent variable} \item{DV.1 ... DV.n}{If within is specified, then the n within subject dependent variables} } \author{William Revelle} \seealso{ The general set of simulation functions in the psych package \code{\link{sim}} } \examples{ set.seed(42) data.df <- sim.anova(es1=1,es2=-.5,es13=1) # two main effect and one interaction psych::describe(data.df) pairs.panels(data.df) #show how the design variables are orthogonal # data.df <- char2numeric(data.df,flag=FALSE) summary(lm(DV~IV1*IV2*IV3,data=data.df)) summary(aov(DV~IV1*IV2*IV3,data=data.df)) lmCor(DV~IV1*IV2*IV3,data=data.df, std=FALSE) set.seed(42) #demonstrate the effect of not centering the data on the regression data.df <- sim.anova(es1=1,es2=.5,es13=1,center=FALSE) # psych::describe(data.df) # #this one is incorrect, because the IVs are not centered data.df <- char2numeric(data.df,flag=FALSE) summary(lm(DV~IV1*IV2*IV3,data=data.df)) data.df <- char2numeric(data.df,flag=FALSE) summary(aov(DV~IV1*IV2*IV3,data=data.df)) #compare with the lm model #but lmCor by default zero centers which works lmCor(DV~IV1*IV2*IV3,data=data.df) #now examine multiple levels and quadratic terms set.seed(42) data.df <- sim.anova(es1=1,es13=1,n2=3,n3=4,es22=1) summary(lm(DV~IV1*IV2*IV3,data=data.df)) summary(aov(DV~IV1*IV2*IV3,data=data.df)) pairs.panels(data.df) # data.df <- sim.anova(es1=1,es2=-.5,within=c(-1,0,1),n=10) pairs.panels(data.df) } \keyword{models} \keyword{multivariate}% __ONLY ONE__ keyword per line psych/man/factor.stats.Rd0000644000176200001440000001544414472146571015100 0ustar liggesusers\name{factor.stats} \Rdversion{1.1} \alias{factor.stats} \alias{fa.stats} \title{Find various goodness of fit statistics for factor analysis and principal components } \description{Chi square and other goodness of fit statistics are found based upon the fit of a factor or components model to a correlation matrix. Although these statistics are normally associated with a maximum likelihood solution, they can be found for minimal residual (OLS), principal axis, or principal component solutions as well. Primarily called from within these functions, factor.stats can be used by itself. Measures of factorial adequacy and validity follow the paper by Grice, 2001. } \usage{ fa.stats(r=NULL,f,phi=NULL,n.obs=NA,np.obs=NULL,alpha=.05,fm=NULL, smooth=TRUE, coarse=TRUE) factor.stats(r=NULL,f,phi=NULL,n.obs=NA,np.obs=NULL,alpha=.1,fm=NULL, smooth=TRUE, coarse=TRUE) } \arguments{ \item{r}{A correlation matrix or a data frame of raw data} \item{f}{A factor analysis loadings matrix or the output from a factor or principal components analysis. In which case the r matrix need not be specified.} \item{phi}{A factor intercorrelation matrix if the factor solution was oblique.} \item{n.obs}{The number of observations for the correlation matrix. If not specified, and a correlation matrix is used, chi square will not be reported. Not needed if the input is a data matrix.} \item{np.obs}{The pairwise number of subjects for each pair in the correlation matrix. This is used for finding observed chi square.} \item{alpha}{alpha level of confidence intervals for RMSEA (twice the confidence at each tail)} \item{fm}{flag if components are being given statistics} \item{smooth}{Should the corelation matrix be smoothed before finding the stats} \item{coarse}{By default, find the coarse coded statistics.} } \details{Combines the goodness of fit tests used in \code{\link{fa}} and principal into one function. If the matrix is singular, will smooth the correlation matrix before finding the fit functions. Now will find the RMSEA (root mean square error of approximation) and the alpha confidence intervals similar to a SEM function. Also reports the root mean square residual. Chi square is found two ways. The first (STATISTIC) applies the goodness of fit test from Maximum Likelihood objective function (see below). This assumes multivariate normality. The second is the empirical chi square based upon the observed residual correlation matrix and the observed sample size for each correlation. This is found by summing the squared residual correlations time the sample size. } \value{ \item{fit}{How well does the factor model reproduce the correlation matrix. (See \code{\link{VSS}}, \code{\link{ICLUST}}, and \code{\link{principal}} for this fit statistic.} \item{fit.off}{how well are the off diagonal elements reproduced? This is just 1 - the relative magnitude of the squared off diagonal residuals to the squared off diagonal original values.} \item{dof}{Degrees of Freedom for this model. This is the number of observed correlations minus the number of independent parameters. Let n=Number of items, nf = number of factors then \cr \eqn{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}} \item{objective}{value of the function that is minimized by maximum likelihood procedures. This is reported for comparison purposes and as a way to estimate chi square goodness of fit. The objective function is \cr \eqn{f = log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^{-1} R|) - n.items}{log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^-1 R|) - n.items}. } \item{STATISTIC}{If the number of observations is specified or found, this is a chi square based upon the objective function, f. Using the formula from \code{\link{factanal}}(which seems to be Bartlett's test) : \cr \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{ chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f } Note that this is different from the chi square reported by the sem package which seems to use \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{chi^2 = (n.obs - 1 )* f } } \item{PVAL}{If n.obs > 0, then what is the probability of observing a chisquare this large or larger?} \item{Phi}{If oblique rotations (using oblimin from the GPArotation package or promax) are requested, what is the interfactor correlation.} \item{R2}{The multiple R square between the factors and factor score estimates, if they were to be found. (From Grice, 2001)} \item{r.scores}{The correlations of the factor score estimates, if they were to be found.} \item{weights}{The beta weights to find the factor score estimates} \item{valid}{The validity coffiecient of coarse coded (unit weighted) factor score estimates (From Grice, 2001)} \item{score.cor}{The correlation matrix of coarse coded (unit weighted) factor score estimates, if they were to be found, based upon the loadings matrix. Note that these are not the same as the correlation of the factor score estimates r.scores} \item{RMSEA}{The Root Mean Square Error of Approximation and the alpha confidence intervals. Based upon the chi square non-centrality parameter. This is found as \eqn{\sqrt{f/dof - 1(/-1)}}{sqrt((f/df - 1(N-1))} } \item{rms}{The empirically found square root of the squared residuals. This does not require sample size to be specified nor does it make assumptions about normality.} \item{crms}{While the rms uses the number of correlations to find the average, the crms uses the number of degrees of freedom. Thus, there is a penalty for having too complex a model.} } \author{William Revelle} \references{ Grice, James W.,2001, Computing and evaluating factor scores, Psychological Methods, 6,4, 430-450. } \note{The problem of factor and factor score estimates leads to multiple different estimates of the correlations between the factors. Phi is the factor intercorrelation matrix from the rotations, r.scores is the correlation of the factor score estimates (if they were to be found from the data), score.cor is the correlation of the coarse coded factor score estimates, (if they were to be found). and of course the correlation of the factor score estimates themselves. By default, the first three of these are found. } \seealso{ \code{\link{fa}} with fm="pa" for principal axis factor analysis, \code{\link{fa}} with fm="minres" for minimum residual factor analysis (default). \code{\link{factor.pa}} also does principal axis factor analysis, but is deprecated, as is \code{\link{factor.minres}} for minimum residual factor analysis. See \code{\link{principal}} for principal components. } \examples{ v9 <- sim.hierarchical() f3 <- fa(v9,3) factor.stats(v9,f3,n.obs=500) f3o <- fa(v9,3,fm="pa",rotate="Promax") factor.stats(v9,f3o,n.obs=500) } \keyword{ multivariate } \keyword{ models}% __ONLY ONE__ keyword per line psych/man/bassAckward.Rd0000644000176200001440000002062414537110341014672 0ustar liggesusers\name{bassAckward} \alias{bassAckward} \alias{bassAckward.diagram} \title{The Bass-Ackward factoring algorithm discussed by Goldberg } \description{ Goldberg (2006) described a hierarchical factor structure organization from the ``top down". The original idea was to do successive factor analyses from 1 to nf factors organized by factor score correlations from one level to the next. Waller (2007) discussed a simple way of doing this for components without finding the scores. Using the factor correlations (from Gorsuch) to organize factors hierarchically results may be organized at many different levels. The algorithm may be applied to principal components (pca) or to true factor analysis. } \usage{ bassAckward(r, nfactors = 1, fm = "minres", rotate = "oblimin", scores = "tenBerge", adjust=TRUE, plot=TRUE,cut=.3, use = "pairwise", cor = "cor", weight = NULL, correct = 0.5,...) bassAckward.diagram(x,digits=2,cut = .3,labels=NULL,marg=c(1.5,.5,1.0,.5), main="BassAckward",items=TRUE,sort=TRUE,lr=TRUE,curves=FALSE,organize=TRUE, values=FALSE,...) } \arguments{ \item{r}{A correlation matrix or a data matrix suitable for factoring} \item{nfactors}{Factors from 1 to nfactors will be extracted. If nfactors is a a vector, then just the number of factors specified in the vector will be extracted. (See examples). } \item{fm}{Factor method. The default is 'minres' factoring. Although to be consistent with the original Goldberg article, we can also do principal components (fm ="pca"). } \item{rotate}{What type of rotation to apply. The default for factors is oblimin. Unlike the normal call to pca where the default is varimax, in bassAckward the default for pca is oblimin.} \item{scores}{What factor scoring algorithm should be used. The default is "tenBerge", other possibilities include "regression", or "bartlett"} \item{adjust}{If using any other scoring proceure that "tenBerge" should we adjust the correlations for the lack of factor score fit?} \item{plot}{By default draw a bassAckward diagram} \item{use}{How to treat missing data. Use='pairwise" finds pairwise complete correlations. } \item{cor}{What kind of correlation to find. The default is Pearson. } \item{weight}{Should cases be weighted? Default, no. } \item{correct}{If finding tetrachoric or polychoric correlations, what correction should be applied to empty cells (defaults to .5) } \item{x}{The object returned by bassAckward} \item{digits}{Number of digits to display on each path} \item{cut}{Values greater than the abs(cut) will be displayed in a path diagram.} \item{labels}{Labels may be taken from the output of the bassAckward function or can be specified as a list.} \item{marg}{Margins are set to be slightly bigger than normal to allow for a cleaner diagram} \item{main}{The main title for the figure} \item{items}{if TRUE, show the items associated with the factors} \item{sort}{if TRUE, sort the items by factor loadings} \item{lr}{Should the graphic be drawn left to right or top to bottom} \item{curves}{Should we show the correlations between factors at the same level} \item{organize}{Rename and sort the factors at two lowest levels for a more pleasing figure} \item{values}{If TRUE, then show the percent variance accounted for by this factor.} \item{...}{Other graphic parameters (e.g., cex)} } \details{ This is essentially a wrapper to the \code{\link{fa}} and \code{\link{pca}} combined with the \code{\link{faCor}} functions. They are called repeatedly and then the weights from the resulting solutions are used to find the factor/component correlations. Although the default is do all factor solutions from 1 to the nfactors, this can be simplified by specifying just some of the factor solutions. Thus, for the 135 items of the spi, it is more reasonable to ask for 3,5, and 27 item solutions. The function \code{\link{bassAckward.diagram}} may be called using the \code{\link{diagram}} function or may be called directly. The output from \code{\link{bassAckward.diagram}} is the sorted factor structure suitable for using \code{\link{fa.lookup}}. Although not particularly pretty, it is possible to do Schmid-Leiman rotations at each level. Specify the rotation as rotate="schmid". } \value{ \item{Call}{Echo the call} \item{fm}{Echos the factor method used} \item{fa}{A list of the factor loadings at each level} \item{bass.ack}{A list of the factor correlations at each level} \item{summary}{The factors at each level} \item{sumnames}{Summary of the factor names} \item{labels}{Factor labels including items for each level} \item{r}{The correlation matrix analyzed} \item{Phi}{The factor correlations at each level} \item{fa}{The factor analysis loadings at each level, now includes Phi values} \item{fa.vac}{The variance accounted for by each factor at each level} } \references{ Goldberg, L.R. (2006) Doing it all Bass-Ackwards: The development of hierarchical factor structures from the top down. Journal of Research in Personality, 40, 4, 347-358. Gorsuch, Richard, (1983) Factor Analysis. Lawrence Erlebaum Associates. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Waller, N. (2007), A general method for computing hierarchical component structures by Goldberg's Bass-Ackwards method, Journal of Research in Personality, 41, 4, 745-752, DOI: 10.1016/j.jrp.2006.08.005 } \author{William Revelle} \note{Goldberg calculated factor/component scores and then correlated these. Waller suggests just looking at the unrotated components and then examining the correlations when rotating different numbers of components. I do not follow the Waller procedure, but rather find successive factors and then find factor/component correlations following Gorsuch. It is important to note that the BassAckward solution is not a hierarchical solution in the standard meaning. The factors are not factors of factors as is found in a hierarchical model (e.g. \code{\link{sim.hierarchical}} or \code{\link{omega}}, but is merely way of organising solutions with a different number of factors. In each case, unlike \code{\link{omega}} the factors are of the original variables, not the lower level factors. Thus, detailed statistics for any level of the hierarchy may be found by doing a factoring with that particular number of factors. To find basic statistics for the multiple factorings, examine the fa object. For more detail just do the factoring at that level. To see the items associated with the lowest level factors, use \code{\link{fa.lookup}}. For the items associated with other levels, use \code{\link{fa.lookup}} specifying the level. (See examples.) } \seealso{ \code{\link{fa}}, \code{\link{pca}}, \code{\link{omega}} and \code{\link{iclust}} for alternative hierarchical solutions. \code{link{fa.lookup}} to show the items in the lowest level of the solution. } \examples{ bassAckward(Thurstone,4,main="Thurstone data set") f.labels <- list(level1=cs(Approach,Avoid),level2=cs(PosAffect,NegAffect,Constraint), level3 = cs(Extraversion,Agreeableness,Neuroticism,Conscientiousness,Openness)) ba <- bassAckward(psychTools::bfi[1:25],c(2,3,5),labels=f.labels, main="bfi data set from psychTools", values=TRUE) print(ba,short=FALSE) #show the items associated with the 5 level solution fa.lookup(ba,dictionary=psychTools::bfi.dictionary) #now show the items associated with the 3 level solution fa.lookup(ba$fa[[2]],dictionary=psychTools::bfi.dictionary) # compare the 3 factor solution to what get by extracting 3 factors directly f3 <- fa(psychTools::bfi[1:25],3) f3$loadings - ba$fa[[2]]$loadings # these are the same #do pca instead of factors just summarize, don't plot summary(bassAckward(psychTools::bfi[1:25],c(1,3,5,7),fm="pca", main="Components",plot=FALSE)) ##not run, but useful example \donttest{ f.labels <- list(level1 = cs(Neg,Pos,Constrain),level2 = cs(Extra,Neuro,Cons,Open,Agree), level3 = cs(attnseeking,sociability,impulsivity, charisma,sensationseek,emotexpr,humor,anxiety, emotstab,irritability,wellbeing,industry,order,author,honesty,perfect,easygoing, selfcontrol,conservatism,creativity,introspect,art, intellect,conform,adaptability,compassion,trust)) sp5 <- bassAckward(psychTools::spi[11:145], c(3,5,27),labels=f.labels, main="spi data set from psychTools") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{ models}psych/man/sim.item.Rd0000644000176200001440000001712314414305207014173 0ustar liggesusers\name{sim.item} \alias{sim.spherical} \alias{item.sim} \alias{sim.item} \alias{sim.dichot} \alias{item.dichot} \alias{sim.circ} \alias{circ.sim} \alias{con2cat} \title{Generate simulated data structures for circumplex, spherical, or simple structure } \description{Rotations of factor analysis and principal components analysis solutions typically try to represent correlation matrices as simple structured. An alternative structure, appealing to some, is a circumplex structure where the variables are uniformly spaced on the perimeter of a circle in a two dimensional space. Generating simple structure and circumplex data is straightforward, and is useful for exploring alternative solutions to affect and personality structure. A generalization to 3 dimensional (spherical) data is straightforward. } \usage{ sim.item(nvar = 72, nsub = 500, circum = FALSE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, categorical = FALSE, low = -3, high = 3, truncate = FALSE, threshold=NULL) sim.circ(nvar = 72, nsub = 500, circum = TRUE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, categorical = FALSE, low = -3, high = 3, truncate = FALSE, cutpoint = 0) sim.dichot(nvar = 72, nsub = 500, circum = FALSE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, low = 0, high = 0) item.dichot(nvar = 72, nsub = 500, circum = FALSE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, low = 0, high = 0) sim.spherical(simple=FALSE, nx=7,ny=12 ,nsub = 500, xloading =.55, yloading = .55, zloading=.55, gloading=0, xbias=0, ybias = 0, zbias=0,categorical=FALSE, low=-3,high=3,truncate=FALSE, threshold=NULL) con2cat(old,cuts=c(0,1,2,3),where) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{nvar}{ Number of variables to simulate } \item{nsub}{Number of subjects to simulate } \item{circum}{ circum=TRUE is circumplex structure, FALSE is simple structure} \item{simple}{simple structure or spherical structure in sim.spherical} \item{xloading}{the average loading on the first dimension } \item{yloading}{Average loading on the second dimension } \item{zloading}{the average loading on the third dimension in sim.spherical} \item{gloading}{Average loading on a general factor (default=0)} \item{xbias}{To introduce skew, how far off center is the first dimension } \item{ybias}{To introduce skew on the second dimension} \item{zbias}{To introduce skew on the third dimension -- if using sim.spherical} \item{categorical}{ continuous or categorical variables. } \item{low}{ values less than low are forced to low (or 0 in item.dichot)} \item{high}{ values greater than high are forced to high (or 1 in item.dichot) } \item{cutpoint}{cut items at cutpoint (see threshold)} \item{truncate}{Change all values less than cutpoint to cutpoint. } \item{threshold}{A vector of cutpoints to conver continuous items to binary } \item{nx}{number of variables for the first factor in sim.spherical}y \item{ny}{number of variables for the second and third factors in sim.spherical} \item{old}{a matrix or data frame} \item{cuts}{Values of old to be used as cut points when converting continuous values to categorical values} \item{where}{Which columns of old should be converted to categorical variables. If missing, then all columns are converted.} } \details{This simulation was originally developed to compare the effect of skew on the measurement of affect (see Rafaeli and Revelle, 2005). It has been extended to allow for a general simulation of affect or personality items with either a simple structure or a circumplex structure. Items can be continuous normally distributed, or broken down into n categories (e.g, -2, -1, 0, 1, 2). Items can be distorted by limiting them to these ranges, even though the items have a mean of (e.g., 1). With the addition of a threshold parameter (replacing the previous cut parameter), each item will converted to a binary (0/1) value if the theta exceeds the threshold. If threshold is a vector with length less than nvar, then it will be filled out to length nvar by sampling with replacement. The addition of item.dichot allows for testing structures with dichotomous items of different difficulty (endorsement) levels. Two factor data with either simple structure or circumplex structure are generated for two sets of items, one giving a score of 1 for all items greater than the low (easy) value, one giving a 1 for all items greater than the high (hard) value. The default values for low and high are 0. That is, all items are assumed to have a 50 percent endorsement rate. To examine the effect of item difficulty, low could be -1, high 1. This will lead to item endorsements of .84 for the easy and .16 for the hard. Within each set of difficulties, the first 1/4 are assigned to the first factor factor, the second to the second factor, the third to the first factor (but with negative loadings) and the fourth to the second factor (but with negative loadings). It is useful to compare the results of sim.item with sim.hierarchical. sim.item will produce a general factor that runs through all the items as well as two orthogonal factors. This produces a data set that is hard to represent with standard rotation techniques. Extracting 3 factors without rotation and then rotating the 2nd and 3rd factors reproduces the correct solution. But simple oblique rotation of 3 factors, or an \code{\link{omega}} analysis do not capture the underlying structure. See the last example. Yet another structure that might be appealing is fully complex data in three dimensions. That is, rather than having items representing the circumference of a circle, items can be structured to represent equally spaced three dimensional points on a sphere. \code{\link{sim.spherical}} produces such data. } \value{ A data matrix of (nsub) subjects by (nvar) variables. } \references{ Variations of a routine used in Rafaeli and Revelle, 2006; Rafaeli, E. & Revelle, W. (2006). A premature consensus: Are happiness and sadness truly opposite affects? Motivation and Emotion. \url{https://personality-project.org/revelle/publications/rafaeli.revelle.06.pdf} Acton, G. S. and Revelle, W. (2004) Evaluation of Ten Psychometric Criteria for Circumplex Structure. Methods of Psychological Research Online, Vol. 9, No. 1 (formerly (https://www.dgps.de/fachgruppen/methoden/mpr-online/issue22/mpr110_10.pdf) also at \url{https://personality-project.org/revelle/publications/acton.revelle.mpr110_10.pdf} } \author{ William Revelle } \seealso{ See also the implementation in this series of functions to generate numerous data structures. \code{\link{simCor}}, \code{\link{simulation.circ}}, \code{\link{circ.tests}} as well as other simulations ( \code{\link{sim.structural}} \code{\link{sim.hierarchical}})} \examples{ round(cor(circ.sim(nvar=8,nsub=200)),2) plot(fa(circ.sim(16,500),2)$loadings,main="Circumplex Structure") #circumplex structure # # plot(fa(item.sim(16,500),2)$loadings,main="Simple Structure") #simple structure # cluster.plot(fa(item.dichot(16,low=0,high=1),2)) set.seed(42) data <- mnormt::rmnorm(1000, c(0, 0), matrix(c(1, .5, .5, 1), 2, 2)) #continuous data new <- con2cat(data,c(-1.5,-.5,.5,1.5)) #discreet data polychoric(new) #not run #x12 <- sim.item(12,gloading=.6) #f3 <- fa(x12,3,rotate="none") #f3 #observe the general factor #oblimin(f3$loadings[,2:3]) #show the 2nd and 3 factors. #f3 <- fa(x12,3) #now do it with oblimin rotation #f3 # not what one naively expect. } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate} \keyword{datagen} psych/man/ICC.Rd0000644000176200001440000002314314210442000013026 0ustar liggesusers\name{ICC} \alias{ICC} \title{ Intraclass Correlations (ICC1, ICC2, ICC3 from Shrout and Fleiss) } \description{The Intraclass correlation is used as a measure of association when studying the reliability of raters. Shrout and Fleiss (1979) outline 6 different estimates, that depend upon the particular experimental design. All are implemented and given confidence limits. Uses either aov or lmer depending upon options. lmer allows for missing values. } \usage{ ICC(x,missing=TRUE,alpha=.05,lmer=TRUE,check.keys=FALSE) } \arguments{ \item{x}{a matrix or dataframe of ratings} \item{missing}{if TRUE, remove missing data -- work on complete cases only (aov only)} \item{alpha}{The alpha level for significance for finding the confidence intervals} \item{lmer}{Should we use the lmer function from lme4? This handles missing data and gives variance components as well. TRUE by default.} \item{check.keys}{If TRUE reverse those items that do not correlate with total score. This is not done by default.} } \details{Shrout and Fleiss (1979) consider six cases of reliability of ratings done by k raters on n targets. McGraw and Wong (1996) consider 10, 6 of which are identical to Shrout and Fleiss and 4 are conceptually different but use the same equations as the 6 in Shrout and Fleiss. The intraclass correlation is used if raters are all of the same ``class". That is, there is no logical way of distinguishing them. Examples include correlations between pairs of twins, correlations between raters. If the variables are logically distinguishable (e.g., different items on a test), then the more typical coefficient is based upon the inter-class correlation (e.g., a Pearson r) and a statistic such as \code{\link{alpha}} or \code{\link{omega}} might be used. alpha and ICC3k are identical. Where the data are laid out in terms of Rows (subjects) and Columns (rater or tests), the various ICCs are found by the ratio of various estimates of variance components. In all cases, subjects are taken as varying at random, and the residual variance is also random. The distinction between models 2 and 3 is whether the judges (items/tests) are seen as random or fixed. A further distinction is whether the emphasis is upon absolute agreement of the judges, or merely consistency. As discussed by Liljequist et al. (2019), McGraw and Wong lay out 5 models which use just three forms of the ICC equations. Model 1 is a one way model with ICC1: Each target is rated by a different judge and the judges are selected at random. \deqn{ ICC(1,1) = \rho_{1,1} = \frac{\sigma^2_r}{\sigma^2_r + \sigma^2_w} } (This is a one-way ANOVA fixed effects model and is found by (MSB- MSW)/(MSB+ (nr-1)*MSW)) ICC2: A random sample of k judges rate each target. The measure is one of absolute agreement in the ratings. \deqn{ ICC(2,1) = \rho_{2,1} = \frac{\sigma^2_r}{\sigma^2_r + \sigma^2_c +\sigma^2_{rc} + \sigma^2_e} } Found as (MSB- MSE)/(MSB + (nr-1)*MSE + nr*(MSJ-MSE)/nc) ICC3: A fixed set of k judges rate each target. There is no generalization to a larger population of judges. \deqn{ ICC(3,1) = \rho_{3,1} = \frac{\sigma^2_r}{\sigma^2_r + \sigma^2_c + \sigma^2_e} } (MSB - MSE)/(MSB+ (nr-1)*MSE) Then, for each of these cases, is reliability to be estimated for a single rating or for the average of k ratings? (The 1 rating case is equivalent to the average intercorrelation, the k rating case to the Spearman Brown adjusted reliability.) ICC1 is sensitive to differences in means between raters and is a measure of absolute agreement. ICC2 and ICC3 remove mean differences between judges, but are sensitive to interactions of raters by judges. The difference between ICC2 and ICC3 is whether raters are seen as fixed or random effects. ICC1k, ICC2k, ICC3K reflect the means of k raters. If using the lmer option, then missing data are allowed. In addition the lme object returns the variance decomposition. (This is simliar to \code{\link{testRetest}} which works on the items from two occasions. The check.keys option by default reverses items that are negatively correlated with total score. A message is issued. } \value{ \item{results}{A matrix of 6 rows and 8 columns, including the ICCs, F test, p values, and confidence limits} \item{summary}{The anova summary table or the lmer summary table} \item{stats}{The anova statistics (converted from lmer if using lmer)} \item{MSW}{Mean Square Within based upon the anova} \item{lme}{The variance decomposition if using the lmer option} } \references{ Shrout, Patrick E. and Fleiss, Joseph L. Intraclass correlations: uses in assessing rater reliability. Psychological Bulletin, 1979, 86, 420-3428. McGraw, Kenneth O. and Wong, S. P. (1996), Forming inferences about some intraclass correlation coefficients. Psychological Methods, 1, 30-46. + errata on page 390. Liljequist David, Elfving Britt and Skavberg Kirsti (2019) Intraclass correlation-A discussion and demonstration of basic features. PLoS ONE 14(7): e0219854. https://doi.org/10.1371/journal. Revelle, W. (in prep) An introduction to psychometric theory with applications in R. Springer. (working draft available at \url{https://personality-project.org/r/book/} } \author{William Revelle } \note{The results for the Lower and Upper Bounds for ICC(2,k) do not match those of SPSS 9 or 10, but do match the definitions of Shrout and Fleiss. SPSS seems to have been using the formula in McGraw and Wong, but not the errata on p 390. They seem to have fixed it in more recent releases (15). Starting with psych 1.4.2, the confidence intervals are based upon (1-alpha)\% at both tails of the confidence interval. This is in agreement with Shrout and Fleiss. Prior to 1.4.2 the confidence intervals were (1-alpha/2)\%. However, at some point, this error slipped back again. It has been fixed in version 1.9.5 (5/21/19). However, following some very useful comments from Mijke Rhumtulla, I should divide by 2 after all. This has been fixed (again) 3/05/22 for version 2.2.2. An occasionally asked question: How do these 6 measures compare to the 10 ICCsreported by SPSS? The answer is that the 6 Shrout and Fleiss are the same as the 10 by McGraw and Wong. A very helpful discussion of the formulae used in the calculations is by Liljequist et al, 2019. Although M & W distinguish between two way random effects and two way mixed effects for consistency of a single rater, examining their formula show that both of these are S& F ICC(3,1). Similarly, the S&F ICC(2,1) is called both Two way random effects, absolute agreement and Two way mixed effects, absolute agreement by M&W. The same confusion occurs with multiple raters (3,k) are both two way random effects and mixed effects for consistency and (2,k) are both two way random and two way mixed absolute agreement. As M&W say "The importance of the random-fixed effects distinction is in its effect on the interpretation, but not calculation, of an ICC. Namely, when levels of the column factor are randomly sampled, one can generalize beyond one's data, but not when they are fixed. In either case, however, the value of the ICC is the same," (p 37) To quote from IBM support: ``Problem I'm using the SPSS RELIABILITY procedure to compute intraclass correlation coefficients (ICCs), and am trying to make sure I understand the correspondence between the measures available in SPSS and those discussed in Shrout and Fleiss. How do the two sets of models relate to each other? Resolving The Problem Shrout and Fleiss (1979) discussed six ICC measures, which consist of pairs of measures for the reliability of a single rating and that of the average of k ratings (where k is the number of raters) for three different models: the one-way random model (called Case 1), the two-way random model (Case 2), and the two-way mixed model (Case 3). The measures implemented in SPSS were taken from McGraw and Wong (1996), who discussed these six measures, plus four others. The additional ones in McGraw and Wong are actually numerically identical to the four measures for the two-way cases discussed by Shrout and Fleiss, differing only in their interpretation. The correspondence between the measures available in SPSS and those discussed in Shrout and Fleiss is as follows: S&F(1,1) = SPSS One-Way Random Model, Absolute Agreement, Single Measure S&F(1,k) = SPSS One-Way Random Model, Absolute Agreement, Average Measure S&F(2,1) = SPSS Two-Way Random Model, Absolute Agreement, Single Measure S&F(2,k) = SPSS Two-Way Random Model, Absolute Agreement, Average Measure S&F(3,1) = SPSS Two-Way Mixed Model, Consistency, Single Measure S&F(3,k) = SPSS Two-Way Mixed Model, Consistency, Average Measure SPSS also offers consistency measures for the two-way random case (numerically equivalent to the consistency measures for the two-way mixed case, but differing in interpretation), and absolute agreement measures for the two-way mixed case (numerically equivalent to the absolute agreement measures for the two-way random case, again differing in interpretation) } \examples{ sf <- matrix(c( 9, 2, 5, 8, 6, 1, 3, 2, 8, 4, 6, 8, 7, 1, 2, 6, 10, 5, 6, 9, 6, 2, 4, 7),ncol=4,byrow=TRUE) colnames(sf) <- paste("J",1:4,sep="") rownames(sf) <- paste("S",1:6,sep="") sf #example from Shrout and Fleiss (1979) ICC(sf,lmer=FALSE) #just use the aov procedure #data(sai) sai <- psychTools::sai sai.xray <- subset(sai,(sai$study=="XRAY") & (sai$time==1)) xray.icc <- ICC(sai.xray[-c(1:3)],lmer=TRUE,check.keys=TRUE) xray.icc xray.icc$lme #show the variance components as well } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/Tucker.Rd0000644000176200001440000000305611312472367013711 0ustar liggesusers\name{Tucker} \alias{Tucker} \docType{data} \title{ 9 Cognitive variables discussed by Tucker and Lewis (1973) } \description{Tucker and Lewis (1973) introduced a reliability coefficient for ML factor analysis. Their example data set was previously reported by Tucker (1958) and taken from Thurstone and Thurstone (1941). The correlation matrix is a 9 x 9 for 710 subjects and has two correlated factors of ability: Word Fluency and Verbal. } \usage{data(Tucker)} \format{ A data frame with 9 observations on the following 9 variables. \describe{ \item{\code{t42}}{Prefixes} \item{\code{t54}}{Suffixes} \item{\code{t45}}{Chicago Reading Test: Vocabulary} \item{\code{t46}}{Chicago Reading Test: Sentences} \item{\code{t23}}{First and last letters} \item{\code{t24}}{First letters} \item{\code{t27}}{Four letter words} \item{\code{t10}}{Completion} \item{\code{t51}}{Same or Opposite} } } \details{The correlation matrix from Tucker (1958) was used in Tucker and Lewis (1973) for the Tucker-Lewis Index of factoring reliability. } \source{ Tucker, Ledyard (1958) An inter-battery method of factor analysis, Psychometrika, 23, 111-136. } \references{ L.~Tucker and C.~Lewis. (1973) A reliability coefficient for maximum likelihood factor analysis. Psychometrika, 38(1):1--10. F.~J. Floyd and K.~F. Widaman. (1995) Factor analysis in the development and refinement of clinical assessment instruments., Psychological Assessment, 7(3):286 -- 299. } \examples{ data(Tucker) fa(Tucker,2,n.obs=710) omega(Tucker,2) } \keyword{datasets} psych/man/cta.Rd0000644000176200001440000001515314262121255013216 0ustar liggesusers\name{cta} \alias{cta} \alias{cta.15} \title{Simulate the C(ues) T(endency) A(ction) model of motivation} \description{Dynamic motivational models such as the Dynamics of Action (Atkinson and Birch, 1970, Revelle, 1986) may be reparameterized as a simple pair of differential (matrix) equations (Revelle, 1986, 2008). This function simulates the dynamic aspects of the CTA. The CTA model is discussed in detail in Revelle and Condon (2015). } \usage{ cta (n=3,t=5000, cues = NULL, act=NULL, inhibit=NULL,expect = NULL, consume = NULL, tendency = NULL,tstrength=NULL, type="both", fast=2,compare=FALSE,learn=TRUE,reward=NULL) cta.15(n = 3, t = 5000, cues = NULL,stim=NULL, act = NULL, inhibit = NULL, consume = NULL, ten = NULL, type = "both", fast = 2) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{n}{number of actions to simuate } \item{t}{length of time to simulate} \item{cues}{a vector of cue strengths} \item{stim}{A vector of the environmental stimuli} \item{act}{matrix of associations between cues and action tendencies} \item{inhibit}{inhibition matrix } \item{consume}{ Consummation matrix } \item{ten}{Initial values of action tendencies } \item{type}{show actions, tendencies, both, or state diagrams } \item{fast}{display every fast time (skips } \item{expect}{A matrix of expectations} \item{tendency}{starting values of tendencies} \item{tstrength}{a vector of starting value of tendencies} \item{compare}{Allows a two x two graph to compare two plots} \item{learn}{Allow the system to learn (self reinforce) over time} \item{reward}{The strength of the reward for doing an action} } \details{A very thorough discussion of the CTA model is available from Revelle (2008). An application of the model is discussed in Revelle and Condon (2015). \code{\link{cta.15}} is the version used to produce the figures and analysis in Revelle and Condon (2015). \code{\link{cta}} is the most recent version and includes a learning function developed in collaboration with Luke Smillie at the University of Melbourne. The dynamics of action (Atkinson and Birch, 1970) was a model of how instigating forces elicited action tendencies which in turn elicited actions. The basic concept was that action tendencies had inertia. That is, a wish (action tendency) would persist until satisfied and would not change without an instigating force. The consummatory strength of doing an action was thought in turn to reduce the action tendency. Forces could either be instigating or inhibitory (leading to "negaction"). Perhaps the simplest example is the action tendency (T) to eat a pizza. The instigating forces (F) to eat the pizza include the smell and look of the pizza, and once eating it, the flavor and texture. However, if eating the pizza, there is also a consummatory force (C) which was thought to reflect both the strength (gusto) of eating the pizza as well as some constant consummatory value of the activity (c). If not eating the pizza, but in a pizza parlor, the smells and visual cues combine to increase the tendency to eat the pizza. Once eating it, however, the consummatory effect is no longer zero, and the change in action tendency will be a function of both the instigating forces and the consummatory forces. These will achieve a balance when instigating forces are equal to the consummatory forces. The asymptotic strength of eating the pizza reflects this balance and does not require a ``set point" or ``comparator". To avoid the problems of instigating and consummatory lags and the need for a decision mechanism, it is possible to reparameterize the original DOA model in terms of action tendencies and actions (Revelle, 1986). Rather than specifying inertia for action tendencies and a choice rule of always expressing the dominant action tendency, it is useful to distinguish between action tendencies (t) and the actions (a) themselves and to have actions as well as tendencies having inertial properties. By separating tendencies from actions, and giving them both inertial properties, we avoid the necessity of a lag parameter, and by making the decision rule one of mutual inhibition, the process is perhaps easier to understand. In an environment which affords cues for action (c), cues enhance action tendencies (t) which in turn strengthen actions (a). This leads to two differential equations, one describing the growth and decay of action tendencies (t), the other of the actions themselves (a). \deqn{d{t} = {Sc} - { Ca} }{dt = Sc - Ca} and \deqn{d{a} = {Et} - {Ia}}{da = Et - Ia}. (See Revelle and Condon (2015) for an extensive discussion of this model.) \code{\link{cta}} simulates this model, with the addition of a learning parameter such that activities strengthen the connection between cues and tendencies. The learning part of the cta model is still under development. \code{\link{cta.15}} represents the state of the cta model as described in the Revelle and Condon (2015) article. } \value{ graphical output unless type="none" \item{cues}{echo back the cue input} \item{inhibition}{echo back the inhibitory matrix} \item{time}{time spent in each activity} \item{frequency}{Frequency of each activity} \item{tendencies}{average tendency strengths} \item{actions}{average action strength} } \references{ Atkinson, John W. and Birch, David (1970) The dynamics of action. John Wiley, New York, N.Y. Revelle, William (1986) Motivation and efficiency of cognitive performance in Brown, Donald R. and Veroff, Joe (ed). Frontiers of Motivational Psychology: Essays in honor of J. W. Atkinson. Springer. (Available as a pdf at \url{https://personality-project.org/revelle/publications/dynamicsofmotivation.pdf}.) Revelle, W. (2008) Cues, Tendencies and Actions. The Dynamics of Action revisted. \url{https://personality-project.org/revelle/publications/cta.pdf} Revelle, W. and Condon, D. (2015) A model for personality at three levels. Journal of Research in Personality \url{https://www.sciencedirect.com/science/article/pii/S0092656615000318} } \author{ William Revelle } \examples{ #not run #cta() #default values, running over time #cta(type="state") #default values, in a state space of tendency 1 versus tendency 2 #these next are examples without graphic output #not run #two introverts #c2i <- c(.95,1.05) #cta(n=2,t=10000,cues=c2i,type="none") #two extraverts #c2e <- c(3.95,4.05) #cta(n=2,t=10000,cues=c2e,type="none") #three introverts #c3i <- c(.95,1,1.05) #cta(3,t=10000,cues=c3i,type="none") #three extraverts #c3i <- c(3.95,4, 4.05) #cta(3,10000,c3i,type="none") #mixed #c3 <- c(1,2.5,4) #cta(3,10000,c3,type="none") } \keyword{ models } psych/man/structure.diagram.Rd0000644000176200001440000002360314524501125016110 0ustar liggesusers\name{structure.diagram} \alias{structure.diagram} \alias{structure.graph} \alias{structure.sem} \alias{lavaan.diagram} \alias{sem.diagram} \alias{sem.graph} \title{Draw a structural equation model specified by two measurement models and a structural model} \description{Graphic presentations of structural equation models are a very useful way to conceptualize sem and confirmatory factor models. Given a measurement model on x (xmodel) and on y (ymodel) as well as a path model connecting x and y (phi), draw the graph. If the ymodel is not specified, just draw the measurement model (xmodel + phi). If the Rx or Ry matrices are specified, show the correlations between the x variables, or y variables. Perhaps even more usefully, the function returns a model appropriate for running directly in the \emph{sem package} written by John Fox or the \emph{lavaan} package by Yves Rosseel. For this option to work directly, it is necessary to specfy that errrors=TRUE. Input can be specified as matrices or the output from \code{\link{fa}}, factanal, or a rotation package such as \emph{GPArotation}. For symbolic graphs, the input matrices can be character strings or mixtures of character strings and numeric vectors. As an option, for those without Rgraphviz installed, \code{\link{structure.sem}} will just create the sem model and skip the graph. (This functionality is now included in \code{\link{structure.diagram}}.) structure.diagram will draw the diagram without using Rgraphviz and is probably the preferred option. structure.graph will be removed eventually. \code{\link{lavaan.diagram}} will draw either cfa or sem results from the lavaan package. It has been tested for cfa, sem and mimic type output. It takes the output object from \emph{lavaan} and then calls \code{\link{structure.diagram}}. } \usage{ structure.diagram(fx, Phi=NULL,fy=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE, regression=FALSE,lr=TRUE,Rx=NULL,Ry=NULL,digits=1,e.size=.1, main="Structural model", ...) structure.graph(fx, Phi = NULL,fy = NULL, out.file = NULL, labels = NULL, cut = 0.3, errors=TRUE, simple=TRUE,regression=FALSE, size = c(8, 6), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 10), rank.direction = c("RL", "TB", "LR", "BT"), digits = 1, title = "Structural model", ...) structure.sem(fx, Phi = NULL, fy = NULL,out.file = NULL, labels = NULL, cut = 0.3, errors=TRUE, simple=TRUE,regression=FALSE) lavaan.diagram(fit,main,e.size=.1,...) sem.diagram(fit,main="A SEM from the sem package",...) sem.graph(fit,out.file=NULL,main= "A SEM from the sem package",...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{fx}{a factor model on the x variables. } \item{Phi}{A matrix of directed relationships. Lower diagonal values are drawn. If the upper diagonal values match the lower diagonal, two headed arrows are drawn. For a single, directed path, just the value may be specified. } \item{fy}{a factor model on the y variables (can be empty) } \item{Rx}{The correlation matrix among the x variables} \item{Ry}{The correlation matrix among the y variables} \item{out.file}{name a file to send dot language instructions. } \item{labels}{variable labels if not specified as colnames for the matrices} \item{cut}{Draw paths for values > cut } \item{fit}{The output from a lavaan cfa or sem} \item{errors}{draw an error term for observerd variables } \item{simple}{Just draw one path per x or y variable } \item{regression}{Draw a regression diagram (observed variables cause Y)} \item{lr}{Direction of diagram is from left to right (lr=TRUE, default) or from bottom to top (lr=FALSE) } \item{e.size}{size of the ellipses in structure.diagram} \item{main}{main title of diagram} \item{size}{page size of graphic } \item{node.font}{ font type for graph } \item{edge.font}{font type for graph } \item{rank.direction}{ Which direction should the graph be oriented } \item{digits}{Number of digits to draw} \item{title}{ Title of graphic } \item{\dots}{ other options to pass to Rgraphviz } } \details{ The recommended function is structure.diagram which does not use Rgraphviz but which does not produce dot code either. All three structure function return a matrix of commands suitable for using in the sem or lavaan packages. (Specify errors=TRUE to get code that will run directly in the sem package.) The structure.graph output can be directed to an output file for post processing using the dot graphic language but requires that Rgraphviz is installed. lavaan.diagram will create sem, cfa, or mimic diagrams depending upon the lavaan input. sem.diagram and sem.graph convert the output from a simple CFA done with the sem package and draw them using structure.diagram or structure.graph. lavaan.diagram converts the output (fit) from a simple CFA done with the lavaan package and draws them using structure.diagram. The figure is organized to show the appropriate paths between: The correlations between the X variables (if Rx is specified) \cr The X variables and their latent factors (if fx is specified) \cr The latent X and the latent Y (if Phi is specified) \cr The latent Y and the observed Y (if fy is specified) \cr The correlations between the Y variables (if Ry is specified)\cr A confirmatory factor model would specify just fx and Phi, a structural model would include fx, Phi, and fy. The raw correlations could be shown by just including Rx and Ry. \code{\link{lavaan.diagram}} may be called from the \code{\link{diagram}} function which also will call \code{\link{fa.diagram}}, \code{\link{omega.diagram}} or \code{\link{iclust.diagram}}, depending upon the class of the fit. Other diagram functions include \code{\link{fa.diagram}}, \code{\link{omega.diagram}}. All of these functions use the various dia functions such as \code{\link{dia.rect}}, \code{\link{dia.ellipse}}, \code{\link{dia.arrow}}, \code{\link{dia.curve}}, \code{\link{dia.curved.arrow}}, and \code{\link{dia.shape}}. } \value{ \item{sem}{(invisible) a model matrix (partially) ready for input to John Fox's sem package. It is of class ``mod" for prettier output. } \item{lavaan}{(invisible) A model specification for the lavaan package.} \item{dotfile}{If out.file is specified, a dot language file suitable for using in a dot graphics program such as graphviz or Omnigraffle.} A graphic structural diagram in the graphics window } \author{William Revelle} \seealso{ \code{\link{fa.graph}}, \code{\link{omega.graph}}, \code{\link{sim.structural}} to create artificial data sets with particular structural properties.} \examples{ #A set of measurement and structural models #First set up the various matrices fx <- matrix(c(.9,.8,.7,rep(0,9), .6,.7,-.8,rep(0,9),.5,.6,.4),ncol=3) fy <- matrix(c(.9,.8,.6,rep(0,4),.6,.8,-.7),ncol=2) Phi <- matrix(c(1,.35,0,0,0, .35,1,.5,0,0, 0,.5, 1,0,0, .7,-.6, 0, 1,0, .0, 0, .4,0,1 ),ncol=5,byrow=TRUE) #now draw a number of models f1 <- structure.diagram(fx,main = "A measurement model for x") f2 <- structure.diagram(fx,Phi, main = "A measurement model for x") f3 <- structure.diagram(fy=fy, main = "A measurement model for y") f4 <- structure.diagram(fx,Phi,fy,main="A structural path diagram") f5 <- structure.diagram(fx,Phi,fy,main="A structural path diagram",errors=TRUE) #a mimic model fy <- matrix(c(.9,.8,.6,rep(0,4),.6,.8,-.7),ncol=2) fx <- matrix(c(.6,.5,0,.4),ncol=2) mimic <- structure.diagram(fx,fy=fy,simple=FALSE,errors=TRUE, main="A mimic diagram") fy <- matrix(c(rep(.9,8),rep(0,16),rep(.8,8)),ncol=2) structure.diagram(fx,fy=fy, e.size=.05) #symbolic input X2 <- matrix(c("a",0,0,"b","e1",0,0,"e2"),ncol=4) colnames(X2) <- c("X1","X2","E1","E2") phi2 <- diag(1,4,4) phi2[2,1] <- phi2[1,2] <- "r" f2 <- structure.diagram(X2,Phi=phi2,errors=FALSE,main="A symbolic model") #symbolic input with error X2 <- matrix(c("a",0,0,"b"),ncol=2) colnames(X2) <- c("X1","X2") phi2 <- diag(1,2,2) phi2[2,1] <- phi2[1,2] <- "r" f3 <- structure.diagram(X2,Phi=phi2,main="an alternative representation",e.size=.4) #and yet another one X6 <- matrix(c("a","b","c",rep(0,6),"d","e","f"),nrow=6) colnames(X6) <- c("L1","L2") rownames(X6) <- c("x1","x2","x3","x4","x5","x6") Y3 <- matrix(c("u","w","z"),ncol=1) colnames(Y3) <- "Y" rownames(Y3) <- c("y1","y2","y3") phi21 <- matrix(c(1,0,"r1",0,1,"r2",0,0,1),ncol=3) colnames(phi21) <- rownames(phi21) <- c("L1","L2","Y") f4 <- structure.diagram(X6,phi21,Y3) ###the following example is not run but is included to show how to work with lavaan \donttest{ library(lavaan) mod.1 <- 'A =~ A1 + A2 + A3 + A4 + A5 C =~ C1 + C2 + C3 + C4 + C5 E =~ E1 +E2 + E3 + E4 +E5' fit.1 <- sem(mod.1,psychTools::bfi[complete.cases(psychTools::bfi),],std.lv=TRUE) lavaan.diagram(fit.1) #a normal cfa #compare with f3 <- fa(psychTools::bfi[complete.cases(psychTools::bfi),1:15],3) fa.diagram(f3) #a sem model mod.2 <- 'A =~ A1 + A2 + A3 + A4 + A5 C =~ C1 + C2 + C3 + C4 + C5 E =~ E1 +E2 + E3 + E4 +E5 E ~ A + C ' fit.2 <- sem(mod.2,psychTools::bfi[complete.cases(psychTools::bfi),],std.lv=TRUE) lavaan.diagram(fit.2, cut=0,simple=FALSE,main="sem model") # A SEM Model #a mimic model mod.3 <- 'A =~ A1 + A2 + A3 + A4 + A5 C =~ C1 + C2 + C3 + C4 + C5 E =~ E1 +E2 + E3 + E4 +E5 A ~ age + gender C ~ age + gender E ~ age + gender' fit.3 <- sem(mod.3,psychTools::bfi[complete.cases(psychTools::bfi),],std.lv=TRUE) lavaan.diagram(fit.3, cut=0,simple=FALSE,main="mimic model", e.size=.03) } # and finally, a regression model X7 <- matrix(c("a","b","c","d","e","f"),nrow=6) f5 <- structure.diagram(X7,regression=TRUE,main = "Regression model") #and a really messy regession model x8 <- c("b1","b2","b3") r8 <- matrix(c(1,"r12","r13","r12",1,"r23","r13","r23",1),ncol=3) f6<- structure.diagram(x8,Phi=r8,regression=TRUE,main="Regression model") } \keyword{multivariate } \keyword{hplot } psych/man/super.matrix.Rd0000644000176200001440000000551414162446137015120 0ustar liggesusers\name{superMatrix} \alias{superMatrix} \alias{superCor} \alias{super.matrix} \title{Form a super matrix from two sub matrices. } \description{Given the matrices nXm, and jYk, form the super matrix of dimensions (n+j) and (m+k) with with elements x and y along the super diagonal. Useful when considering structural equations. The measurement models x and y can be combined into a larger measurement model of all of the variables. If either x or y is a list of matrices, then recursively form a super matrix of all of those elements. superCor will form a matrix from two matrices and the intercorrelation of the elements of the two. } \usage{ superMatrix(x,y) superCor(x,y=NULL, xy=NULL) super.matrix(x, y) #Deprecated } \arguments{ \item{x}{A n x m matrix or a list of such matrices, or the output object from \code{link{scoreOverlap}} } \item{y}{A j x k matrix or a list of such matrices} \item{xy}{A n x k matrix} } \details{Several functions, e.g., \code{\link{sim.structural}},\code{\link{structure.graph}}, \code{\link{make.keys}} use matrices that can be thought of as formed from a set of submatrices. In particular, when using \code{\link{make.keys}} in order to score a set of items (\code{\link{scoreItems}} or \code{\link{scoreOverlap}}) or to form specified clusters (\code{\link{cluster.cor}}), it is convenient to define different sets of scoring keys for different sets of items and to combine these scoring keys into one super key. When developing scales and examining items using \code{\link{bestScales}} it is sometimes helpful to combine the matrix output from \code{\link{scoreOverlap}} with the original correlation matrix. Thus, let x = correlation of the scales from \code{\link{scoreOverlap}}, y = the correlations of the items used to form the scales, and xy = the correlation of the scales with the items. } \value{ A (n+j) x (m +k) matrix with appropriate row and column names } \author{William Revelle} \seealso{ \code{\link{sim.structural}},\code{\link{structure.graph}}, \code{\link{make.keys}}} \examples{ mx <- matrix(c(.9,.8,.7,rep(0,4),.8,.7,.6),ncol=2) my <- matrix(c(.6,.5,.4)) colnames(mx) <- paste("X",1:dim(mx)[2],sep="") rownames(mx) <- paste("Xv",1:dim(mx)[1],sep="") colnames(my) <- "Y" rownames(my) <- paste("Yv",1:3,sep="") mxy <- superMatrix(mx,my) #show the use of a list to do this as well key1 <- make.keys(6,list(first=c(1,-2,3),second=4:6,all=1:6)) #make a scoring key key2 <- make.keys(4,list(EA=c(1,2),TA=c(3,4))) superMatrix(list(key1,key2)) r <- cor(bfi[1:15],use="pairwise") bfi.scores <- scoreOverlap(bfi.keys[1:2], r,select=FALSE) #note the select = FALSE R <- superCor(bfi.scores,r) lowerMat(R) #or to just get the scale correlations with the items R <- superCor(bfi.scores) round(R,2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } psych/man/logistic.Rd0000644000176200001440000000570412216170023014257 0ustar liggesusers\name{logistic} \alias{logistic} \alias{logit} \alias{logistic.grm} \title{Logistic transform from x to p and logit transform from p to x} \description{The logistic function (1/(1+exp(-x)) and logit function (log(p/(1-p)) are fundamental to Item Response Theory. Although just one line functions, they are included here for ease of demonstrations and in drawing IRT models. Also included is the logistic.grm for a graded response model.} \usage{ logistic(x,d=0, a=1,c=0, z=1) logit(p) logistic.grm( x,d=0,a=1.5,c=0,z=1,r=2,s=c(-1.5,-.5,.5,1.5)) } \arguments{ \item{x}{Any integer or real value } \item{d}{Item difficulty or delta parameter } \item{a}{The slope of the curve at x=0 is equivalent to the discrimination parameter in 2PL models or alpha parameter. Is either 1 in 1PL or 1.702 in 1PN approximations. } \item{c}{Lower asymptote = guessing parameter in 3PL models or gamma } \item{z}{The upper asymptote --- in 4PL models} \item{p}{Probability to be converted to logit value} \item{r}{The response category for the graded response model} \item{s}{The response thresholds} } \details{These three functions are provided as simple helper functions for demonstrations of Item Response Theory. The one parameter logistic (1PL) model is also known as the Rasch model. It assumes items differ only in difficulty. 1PL, 2PL, 3PL and 4PL curves may be drawn by choosing the appropriate d (delta or item difficulty), a (discrimination or slope), c (gamma or guessing) and z (zeta or upper asymptote). logit is just the inverse of logistic. logistic.grm will create the responses for a graded response model for the rth category where cutpoints are in s. } \value{ \item{p}{logistic returns the probability associated with x} \item{x}{logit returns the real number associated with p} } \author{ William Revelle} \examples{ curve(logistic(x,a=1.702),-3,3,ylab="Probability of x", main="Logistic transform of x",xlab="z score units") #logistic with a=1.702 is almost the same as pnorm curve(pnorm(x),add=TRUE,lty="dashed") curve(logistic(x),add=TRUE) text(2,.8, expression(alpha ==1)) text(2,1.0,expression(alpha==1.7)) curve(logistic(x),-4,4,ylab="Probability of x", main = "Logistic transform of x in logit units",xlab="logits") curve(logistic(x,d=-1),add=TRUE) curve(logistic(x,d=1),add=TRUE) curve(logistic(x,c=.2),add=TRUE,lty="dashed") text(1.3,.5,"d=1") text(.3,.5,"d=0") text(-1.5,.5,"d=-1") text(-3,.3,"c=.2") #demo of graded response model curve(logistic.grm(x,r=1),-4,4,ylim=c(0,1),main="Five level response scale", ylab="Probability of endorsement",xlab="Latent attribute on logit scale") curve(logistic.grm(x,r=2),add=TRUE) curve(logistic.grm(x,r=3),add=TRUE) curve(logistic.grm(x,r=4),add=TRUE) curve(logistic.grm(x,r=5),add=TRUE) text(-2.,.5,1) text(-1.,.4,2) text(0,.4,3) text(1.,.4,4) text(2.,.4,5) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/r.test.Rd0000644000176200001440000001616014527736002013673 0ustar liggesusers\name{r.test} \alias{r.test} \title{Tests of significance for correlations} \description{Tests the significance of a single correlation, the difference between two independent correlations, the difference between two dependent correlations sharing one variable (Williams's Test), or the difference between two dependent correlations with different variables (Steiger Tests). } \usage{ r.test(n, r12, r34 = NULL, r23 = NULL, r13 = NULL, r14 = NULL, r24 = NULL, n2 = NULL,pooled=TRUE, twotailed = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{n}{Sample size of first group } \item{r12}{Correlation to be tested} \item{r34}{Test if this correlation is different from r12, if r23 is specified, but r13 is not, then r34 becomes r13 } \item{r23}{if ra = r(12) and rb = r(13) then test for differences of dependent correlations given r23} \item{r13}{implies ra =r(12) and rb =r(34) test for difference of dependent correlations } \item{r14}{implies ra =r(12) and rb =r(34) } \item{r24}{ ra =r(12) and rb =r(34)} \item{n2}{n2 is specified in the case of two independent correlations. n2 defaults to n if if not specified } \item{pooled}{use pooled estimates of correlations} \item{twotailed}{ should a twotailed or one tailed test be used } } \details{Depending upon the input, one of four different tests of correlations is done. 1) For a sample size n, find the t value for a single correlation where \deqn{t = \frac{r * \sqrt(n-2)}{\sqrt(1-r^2)} }{t = r* sqrt(n-2)/sqrt(1-r^2) } and \deqn{se = \sqrt{\frac{1-r^2}{n-2}}) }{se = sqrt((1-r^2)/(n-2))}. 2) For sample sizes of n and n2 (n2 = n if not specified) find the z of the difference between the z transformed correlations divided by the standard error of the difference of two z scores: \deqn{z = \frac{z_1 - z_2}{\sqrt{\frac{1}{(n_1 - 3) + (n_2 - 3)}}}}{t = (z_1 - z_2) * sqrt(1/((n_1)-3 + (n_2-3)))}. 3) For sample size n, and correlations r12, r13 and r23 test for the difference of two dependent correlations (r12 vs r13). 4) For sample size n, test for the difference between two dependent correlations involving different variables. Consider the correlations from Steiger (1980), Table 1: Because these all from the same subjects, any tests must be of dependent correlations. For dependent correlations, it is necessary to specify at least 3 correlations (e.g., r12, r13, r23) \tabular{lrrrrrrr}{ Variable \tab M1 \tab F1 \tab V1 \tab M2 \tab F2 \tab V2 \cr M1 1.00 \cr F1 \tab .10 \tab 1.00\cr V1 \tab .40 \tab .50 \tab 1.00 \cr M2 \tab .70 \tab .05 \tab .50 \tab 1.00 \cr F2 \tab .05 \tab .70 \tab .50 \tab .50 \tab 1.00 \cr V2 \tab .45 \tab .50 \tab .80 \tab .50 \tab .60 \tab 1.00 \cr } For clarity, correlations may be specified by value. If specified by location and if doing the test of dependent correlations, if three correlations are specified, they are assumed to be in the order r12, r13, r23. Consider the examples from Steiger: Case A: where Masculinity at time 1 (M1) correlates with Verbal Ability .5 (r12), femininity at time 1 (F1) correlates with Verbal ability r13 =.4, and M1 correlates with F1 (r23= .1). Then, given the correlations: r12 = .4, r13 = .5, and r23 = .1, t = -.89 for n =103, i.e., r.test(n=103, r12=.4, r13=.5,r23=.1) Case B: Test whether correlation between two variables (e.g., F and V) is the same over time (e.g. F1V1 = F2V2) r.test(n = 103, r12 = 0.5, r34 = 0.6, r23 = 0.5, r13 = 0.7, r14 = 0.5, r24 = 0.8) } \value{ \item{test}{Label of test done} \item{z}{z value for tests 2 or 4} \item{t}{t value for tests 1 and 3} \item{p}{probability value of z or t} } \references{ Cohen, J. and Cohen, P. and West, S.G. and Aiken, L.S. (2003) Applied multiple regression/correlation analysis for the behavioral sciences, L.Erlbaum Associates, Mahwah, N.J. Olkin, I. and Finn, J. D. (1995). Correlations redux. Psychological Bulletin, 118(1):155-164. Steiger, J.H. (1980), Tests for comparing elements of a correlation matrix, Psychological Bulletin, 87, 245-251. Williams, E.J. (1959) Regression analysis. Wiley, New York, 1959. } \author{William Revelle } \note{Steiger specifically rejects using the Hotelling T test to test the difference between correlated correlations. Instead, he recommends Williams' test. (See also Dunn and Clark, 1971). These tests follow Steiger's advice. The test of two independent correlations is just a z test of the difference of the Fisher's z transformed correlations divided by the standard error of the difference. (See Cohen et al, p 49). One of the beautiful features of R is what works on single value works on vectors and matrices. Thus, r.test can be used to test the pairwise diference of all the elements of a correlation matrix. See the last example. By default, the probabilities are reported to 2 decimal places. This will, of course, sometimes lead to statements such as p < .1 when in fact p < .1001 or even more precisely p < .1000759. To achieve the higher precision, use a print statement with the preferred number of digits. See the next to last set of examples (courtesy of Julia Rohrer). } \seealso{ See also \code{\link{corTest}} which tests all the elements of a correlation matrix, and \code{\link{cortest.mat}} to compare two matrices of correlations. r.test extends the tests in \code{\link{paired.r}},\code{\link{r.con}}} \examples{ n <- 30 r <- seq(0,.9,.1) rc <- matrix(r.con(r,n),ncol=2) test <- r.test(n,r) r.rc <- data.frame(r=r,z=fisherz(r),lower=rc[,1],upper=rc[,2],t=test$t,p=test$p) round(r.rc,2) r.test(50,r) r.test(30,.4,.6) #test the difference between two independent correlations r.test(103,.4,.5,.1) #Steiger case A of dependent correlations r.test(n=103, r12=.4, r13=.5,r23=.1) #for complicated tests, it is probably better to specify correlations by name r.test(n=103,r12=.5,r34=.6,r13=.7,r23=.5,r14=.5,r24=.8) #steiger Case B ##By default, the precision of p values is 2 decimals #Consider three different precisions shown by varying the requested number of digits r12 = 0.693458895410494 r23 = 0.988475791500198 r13 = 0.695966022434845 print(r.test(n = 5105 , r12 = r12 , r23 = r23 , r13 = r13 )) #probability < 0.1 print(r.test(n = 5105 , r12 = r12, r23 = r23 , r13 = r13 ),digits=4) #p < 0.1001 print(r.test(n = 5105 , r12 = r12, r23 = r23 , r13 = r13 ),digits=8) #p< <0.1000759 #an example of how to compare the elements of two matrices R1 <- lowerCor(psychTools::bfi[1:200,1:5]) #find one set of Correlations R2 <- lowerCor(psychTools::bfi[201:400,1:5]) #and now another set sampled #from the same population test <- r.test(n=200, r12 = R1, r34 = R2) round(lowerUpper(R1,R2,diff=TRUE),digits=2) #show the differences between correlations #lowerMat(test$p) #show the p values of the difference between the two matrices adjusted <- p.adjust(test$p[upper.tri(test$p)]) both <- test$p both[upper.tri(both)] <- adjusted round(both,digits=2) #The lower off diagonal are the raw ps, the upper the adjusted ps } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/sim.congeneric.Rd0000644000176200001440000000773714340716775015401 0ustar liggesusers\name{sim.congeneric} \alias{congeneric.sim} \alias{sim.congeneric} \alias{make.congeneric} \title{ Simulate a congeneric data set with or without minor factors } \description{Classical Test Theory (CTT) considers four or more tests to be congenerically equivalent if all tests may be expressed in terms of one factor and a residual error. Parallel tests are the special case where (usually two) tests have equal factor loadings. Tau equivalent tests have equal factor loadings but may have unequal errors. Congeneric tests may differ in both factor loading and error variances. Minor factors may be added as systematic but trivial disturbances } \usage{ sim.congeneric(loads = c(0.8, 0.7, 0.6, 0.5),N = NULL, err=NULL, short = TRUE, categorical=FALSE, low=-3,high=3,cuts=NULL,minor=FALSE,fsmall = c(-.2,.2)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{N}{How many subjects to simulate. If NULL, return the population model } \item{loads}{ A vector of factor loadings for the tests } \item{err}{A vector of error variances -- if NULL then error = 1 - loading 2} \item{short}{short=TRUE: Just give the test correlations, short=FALSE, report observed test scores as well as the implied pattern matrix} \item{categorical}{ continuous or categorical (discrete) variables. } \item{low}{ values less than low are forced to low } \item{high}{ values greater than high are forced to high } \item{cuts}{If specified, and categorical = TRUE, will cut the resulting continuous output at the value of cuts} \item{minor}{Should n/2 minor factors be added (see Maccallum and Tucker, 1991)} \item{fsmall}{nvar/2 small factors are generated with loadings sampled from fsmall e.g. (-.2,0,.2)} } \details{When constructing examples for reliability analysis, it is convenient to simulate congeneric data structures. These are the most simple of item structures, having just one factor. Mainly used for a discussion of reliability theory as well as factor score estimates. Maccallum and Tucker (1991) suggest that factor models should include minor factors, that at not random error but unspecifed by the basic model. This option has been added in November, 2022. The implied covariance matrix is just pattern \%*\% t(pattern). } \value{ \item{model}{The implied population correlation matrix if N=NULL or short=FALSE, otherwise the sample correlation matrix} \item{pattern }{The pattern matrix implied by the loadings and error variances} \item{r}{The sample correlation matrix for long output} \item{observed}{a matrix of test scores for n tests} \item{latent}{The latent trait and error scores } } \references{Revelle, W. (in prep) An introduction to psychometric theory with applications in R. To be published by Springer. (working draft available at \url{https://personality-project.org/r/book/} MacCallum, R. C., & Tucker, L. R. (1991). Representing sources of error in the common-factormodel: Implications for theory and practice. Psychological Bulletin, 109(3), 502-511.} \author{ William Revelle } \seealso{ \code{\link{item.sim}} for other simulations, \code{\link{fa}} for an example of factor scores, \code{\link{irt.fa}} and \code{\link{polychoric}} for the treatment of item data with discrete values.} \examples{ test <- sim.congeneric(c(.9,.8,.7,.6)) #just the population matrix test <- sim.congeneric(c(.9,.8,.7,.6),N=100) # a sample correlation matrix test <- sim.congeneric(short=FALSE, N=100) round(cor(test$observed),2) # show a congeneric correlation matrix f1=fa(test$observed,scores=TRUE) round(cor(f1$scores,test$latent),2) #factor score estimates are correlated with but not equal to the factor scores set.seed(42) #500 responses to 4 discrete items items <- sim.congeneric(N=500,short=FALSE,low=-2,high=2,categorical=TRUE) d4 <- irt.fa(items$observed) #item response analysis of congeneric measures } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate} \keyword{datagen} psych/man/fa.lookup.Rd0000644000176200001440000002037314551372356014360 0ustar liggesusers\name{fa.lookup} \alias{lookup} \alias{lookupItems} \alias{fa.lookup} \alias{item.lookup} \alias{itemSort} \alias{keys.lookup} \alias{lookupFromKeys} \alias{lmCorLookup} \title{A set of functions for factorial and empirical scale construction} \description{ When constructing scales through rational, factorial, or empirical means, it is useful to examine the content of the items that relate most highly to each other (e.g., the factor loadings of \code{\link{fa.lookup}} of a set of items), or to some specific set of criteria (e.g., \code{\link{bestScales}}). Given a dictionary of item content, these routines will sort by factor loading, item means, or criteria correlations and display the item content. } \usage{ lookup(x,y,criteria=NULL) lookupItems(content=NULL,dictionary=NULL,search=c("Item","Content","item")) fa.lookup(f,dictionary=NULL,digits=2,cut=.0,n=NULL,sort=TRUE) item.lookup(f,m, dictionary,cut=.3, digits = 2) itemSort(m, dictionary, ascending=TRUE, digits = 2) keys.lookup(keys.list,dictionary) lookupFromKeys(keys.list,dictionary,n=20,cors=NULL,sort=TRUE,suppress.names=FALSE, digits=2) lmCorLookup(x,dictionary=NULL,cut=0,digits=2,p=.05) } \arguments{ \item{x}{A data matrix or data frame depending upon the function.} \item{y}{A data matrix or data frame or a vector} \item{criteria}{Which variables (by name or location) should be the empirical target for bestScales and bestItems. May be a separate object. } \item{f}{The object returned from either a factor analysis (fa) or a principal components analysis (principal) } \item{content}{The word(s) to search for from a dictionary} \item{keys.list}{A list of scoring keys suitable to use for make.keys} \item{cut}{Return all values in abs(x[,c1]) > cut.} \item{n}{Return the n best items per factor (as long as they have their highest loading on that factor)} \item{cors}{If provided (e.g. from scoreItems) will be added to the lookupFromKeys output} \item{dictionary}{a data.frame with rownames corresponding to rownames in the f$loadings matrix or colnames of the data matrix or correlation matrix, and entries (may be multiple columns) of item content. See Notes for how to construct a dictionary.} \item{search}{Column names of dictionary to search, defaults to "Item" or "Content" (dictionaries have different labels for this column), can search any column specified by search.} \item{m}{A data frame of item means} \item{digits}{round to digits} \item{sort}{Should the factors be sorted first?} \item{suppress.names}{In lookupFromKeys, should we suppress the column labels} \item{p}{Show lmCor regressions with probability < p} \item{ascending}{order to sort the means in itemSort -- see dfOrder} } \details{ \code{\link{fa.lookup}} and \code{\link{lookup}} are simple helper functions to summarize correlation matrices or factor loading matrices. \code{\link{bestItems}} will sort the specified column (criteria) of x on the basis of the (absolute) value of the column. The return as a default is just the rowname of the variable with those absolute values > cut. If there is a dictionary of item content and item names, then include the contents as a two column (or more) matrix with rownames corresponding to the item name and then as many fields as desired for item content. (See the example dictionary \code{\link[psychTools]{bfi.dictionary}}). \code{\link{lookup}} is used by \code{\link{bestItems}} and will find values in c1 of y that match those in x. It returns those rows of y of that match x. Suppose that you have a "dictionary" of the many variables in a study but you want to consider a small subset of them in a data set x. Then, you can find the entries in the dictionary corresponding to x by lookup(rownames(x),y) If the column is not specified, then it will match by rownames(y). \code{\link{fa.lookup}} is used when examining the output of a factor analysis and one wants the corresponding variable names and contents. The returned object may then be printed in LaTex by using the \code{\link[psychTools]{df2latex}} function with the char option set to TRUE. \code{\link{fa.lookup}} will work with output from \code{\link{fa}}, \code{\link{pca}} or \code{\link{omega}}. For omega output, the items are sorted by the non-general factor loadings. Similarly, given a correlation matrix, r, of the x variables, if you want to find the items that most correlate with another item or scale, and then show the contents of that item from the dictionary, bestItems(r,c1=column number or name of x, contents = y) \code{\link{item.lookup}} combines the output from a factor analysis \code{\link{fa}} with simple descriptive statistics (a data frame of means) with a dictionary. Items are grouped by factor loadings > cut, and then sorted by item mean. This allows a better understanding of how a scale works, in terms of the meaning of the item endorsements. Note the means must be a one column matrix (with row names), not a vector (without rownames.) \code{\link{itemSort}} Combine item means and item content and then sort them by the item means. \code{\link{lookupItems}} searches a dictionary for all items that have a certain content. The rownames of the returned object are the item numbers which can then be used in other functions to find statistics (e.g. omega) of a scale with those items. If an scales by items correlation matrix is given, then the item correlation with that scale are also shown. } \value{ \code{\link{bestItems}} returns a sorted list of factor loadings or correlations with the labels as provided in the dictionary. \code{\link{lookup}} is a very simple implementation of the match function. \code{\link{fa.lookup}} takes a factor/cluster analysis object (or just a keys like matrix), sorts it using \code{\link{fa.sort}} and then matches by row.name to the corresponding dictionary entries. } \references{ Revelle, W. (in preparation) An introduction to psychometric theory with applications in {R}. Springer. (Available online at \url{https://personality-project.org/r/book/}). } \author{William Revelle} \note{Although empirical scale construction is appealing, it has the basic problem of capitalizing on chance. Thus, be careful of over interpreting the results unless working with large samples. Iteration and bootstrapping aggregation (bagging) gives information on the stability of the solutions. See \code{\link{bestScales}} To create a dictionary, create an object with row names as the item numbers, and the columns as the item content. See the bfi.dictionary in the psychTools package as an example. The bfi.dictionary was constructed from a spreadsheet with multiple columns, the first of which was the column names of the bfi. See the first (not run) example. } \seealso{ \code{\link{fa}}, \code{\link{iclust}},\code{\link{principal}}, \code{\link{bestScales}} and \code{\link{bestItems}} } \examples{ #Tne following shows how to create a dictionary #first, copy the spreadsheet to the clipboard #the spreadsheet should have multiple columns #col 1 col 2 col 3 #item content label #A1 Am indifferent to the feelings of others. (q_146) #A2 Inquire about others' well-being. (q_1162 #... # bfi.dictionary <- read.clipboard.tab() #read from the clipboard # rownames(bfi.dictionary) <- bfi.dictionary[1] #the first column had the names # bfi.dictionary <- bfi.dictionary[-1] #these are redundant, drop them f5 <- fa(psychTools::bfi,5) m <- colMeans(psychTools::bfi,na.rm=TRUE) item.lookup(f5,m,dictionary=psychTools::bfi.dictionary[2,drop=FALSE]) #just show the item content, not the source of the items fa.lookup(f5,dictionary=psychTools::bfi.dictionary[2]) #just show the means and the items #use the m vector we found above itemSort(as.matrix(m),dictionary=psychTools::bfi.dictionary[,2:3,drop=FALSE]) #show how to use lookupFromKeys bfi.keys <- list(agree=c("-A1","A2","A3","A4","A5"),conscientiousness=c("C1","C2","C3","-C4","-C5"), extraversion=c("-E1","-E2","E3","E4","E5"),neuroticism=c("N1","N2","N3","N4","N5"), openness = c("O1","-O2","O3","O4","-O5")) bfi.over <- scoreOverlap(bfi.keys,bfi) #returns the corrected for overlap values lookupFromKeys(bfi.keys,psychTools::bfi.dictionary,n=5, cors=bfi.over$item.cor) #show the keying information lookupItems("life",psychTools::spi.dictionary) #find those items with "life" in the item } \keyword{ models } \keyword{multivariate } psych/man/cattell.Rd0000644000176200001440000000433213407222067014077 0ustar liggesusers\name{cattell} \alias{cattell} \docType{data} \title{12 cognitive variables from Cattell (1963) } \description{ Rindskopf and Rose (1988) use this data set to demonstrate confirmatory second order factor models. It is a nice example data set to explore hierarchical structure and alternative factor solutions. It contains measures of fluid and crystallized intelligence. } \usage{data("cattell")} \format{ A correlation matrix of the following 12 variables from 277 7th and 8th graders \describe{ \item{Verbal}{A verbal ability test from Thurstone} \item{Verbal2}{A verbal ability test from Thurstone} \item{Space1}{A Spatial ability test from Thurstone} \item{Space2}{A Spatial ability test from Thurstone} \item{Reason1}{A reasoning test from Thurstone} \item{Reason2}{A reasoning test from Thurstone} \item{Number1}{A Numerical ability test from Thurstone} \item{Number2}{A Numerical ability test from Thurstone} \item{IPATSer}{A "culture fair" series from the IPAT} \item{IPATCLAS}{A "culture fair" classification test from the IPAT} \item{IPATMatr}{A "culture fair" matrix reasoning test from the IPAT} \item{IPATTop}{A "culture fair" topology test from the IPAT} } } \details{ Cattell (1963) reported on 8 cognitive variables from Thurstone and four from the Institute for Personality Assessment Test (IPAT). Rindskopf and Rose (1988) use this data set as an example of second order factor analysis. It is thus a nice set for examining alternative solutions such as bifactor rotation, \code{\link{omega}} hierarchical, as well as \code{\link{esem}} and \code{\link{interbattery}} factor analysis. } \source{ David Rindskopf and Tedd Rose, (1988) Some Theory and Applications of Confirmatory Second- Order Factor Analysis, Multivariate Behavioral Research, 23, 51-67.} \references{ Cattell, R. B. (1963).Theory of fluid and crystallized intelligence: A critical experiment. Journal of Educational Psychology, 54, 1-22. David Rindskopf and Tedd Rose, (1988) Some Theory and Applications of Confirmatory Second- Order Factor Analysis, Multivariate Behavioral Research, 23, 51-67. } \examples{ data(cattell) corPlot(cattell,numbers=TRUE,upper=FALSE,diag=FALSE, main="12 cognitive variables from Cattell (1963)",xlas=2) } \keyword{datasets} psych/man/table2matrix.Rd0000644000176200001440000000354113464313465015054 0ustar liggesusers\name{table2matrix} \alias{table2matrix} \alias{table2df} \title{ Convert a table with counts to a matrix or data.frame representing those counts.} \description{Some historical sets are reported as summary tables of counts in a limited number of bins. Transforming these tables to data.frames representing the original values is useful for pedagogical purposes. (E.g., transforming the original Galton table of height x cubits in order to demonstrate regression.) The column and row names must be able to be converted to numeric values. } \usage{ table2matrix(x, labs = NULL) table2df(x, count=NULL,labs = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A two dimensional table of counts with row and column names that can be converted to numeric values. } \item{count}{if present, then duplicate each row count times} \item{labs}{Labels for the rows and columns. These will be used for the names of the two columns of the resulting matrix } } \details{The original Galton (1888) of heights by cubits (arm length) is in tabular form. To show this as a correlation or as a scatter plot, it is useful to convert the table to a matrix or data frame of two columns. This function may also be used to convert an item response pattern table into a data table. e.g., the Bock data set \code{\link{bock}}. } \value{A matrix (or data.frame) of sum(x) rows and two columns. } \author{William Revelle} \seealso{ \code{\link[psychTools]{cubits}} and \code{\link{bock}} data sets} \examples{ data(cubits) cubit <- table2matrix(psychTools::cubits,labs=c("height","cubit")) describe(cubit) ellipses(cubit,n=1) data(bock) responses <- table2df(bock.table[,2:6],count=bock.table[,7],labs= paste("lsat6.",1:5,sep="")) describe(responses) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{models} psych/man/fisherz.Rd0000644000176200001440000000475513122760703014130 0ustar liggesusers\name{fisherz} \alias{fisherz} \alias{fisherz2r} \alias{r.con}\alias{r2c} \alias{r2t} \alias{t2r} \alias{g2r} \alias{chi2r} \alias{r2chi} \alias{cor2cov} \title{Transformations of r, d, and t including Fisher r to z and z to r and confidence intervals} \description{Convert a correlation to a z or t, or d, or chi or covariance matrix or z to r using the Fisher transformation or find the confidence intervals for a specified correlation. r2d converts a correlation to an effect size (Cohen's d) and d2r converts a d into an r. g2r converts Hedge's g to a correlation. t2r converts a t test to r, r2t converts a correlation to a t-test value. chi2r converts a chi square to r, r2chi converts it back. r2c and cor2cov convert a correlation matrix to a covariance matrix. d2t and t2d convert cohen's d into a t and a t into a cohen d. See \code{\link{cohen.d}} for other conversions. } \usage{ fisherz(rho) fisherz2r(z) r.con(rho,n,p=.95,twotailed=TRUE) r2t(rho,n) t2r(t,df) g2r(g,df,n) chi2r(chi2,n) r2chi(rho,n) r2c(rho,sigma) cor2cov(rho,sigma) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{rho}{ a Pearson r } \item{z}{A Fisher z} \item{n}{Sample size for confidence intervals} \item{df}{degrees of freedom for t, or g} \item{p}{Confidence interval} \item{twotailed}{Treat p as twotailed p} \item{g}{An effect size (Hedge's g)} \item{t}{A student's t value} \item{chi2}{A chi square} \item{sigma}{a vector of standard deviations to be used to convert a correlation matrix to a covariance matrix} } \value{ \item{z}{ value corresponding to r (fisherz)} \item{r}{r corresponding to z (fisherz2r)} \item{r.con}{lower and upper p confidence intervals (r.con)} \item{t}{t with n-2 df (r2t)} \item{r}{r corresponding to effect size d or d corresponding to r.} \item{r2c}{r2c is the reverse of the cor2con function of base R. It just converts a correlation matrix to the corresponding covariance matrix given a vector of standard deviations.} } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu } } \examples{ n <- 30 r <- seq(0,.9,.1) d <- r2d(r) rc <- matrix(r.con(r,n),ncol=2) t <- r*sqrt(n-2)/sqrt(1-r^2) p <- (1-pt(t,n-2))*2 r1 <- t2r(t,(n-2)) r2 <- d2r(d) chi <- r2chi(r,n) r3 <- chi2r(chi,n) r.rc <- data.frame(r=r,z=fisherz(r),lower=rc[,1],upper=rc[,2],t=t,p=p,d=d, chi2=chi,d2r=r2,t2r=r1,chi2r=r3) round(r.rc,2) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/score.items.Rd0000644000176200001440000005571413721300567014715 0ustar liggesusers\name{scoreItems} \alias{scoreItems} \alias{scoreFast} \alias{scoreVeryFast} \alias{score.items} \alias{response.frequencies} \alias{responseFrequency} \title{ Score item composite scales and find Cronbach's alpha, Guttman lambda 6 and item whole correlations } \description{Given a data.frame or matrix of n items and N observations and a list of the direction to score them (a keys.list with k keys) find the sum scores or average scores for each person and each scale. In addition, report Cronbach's alpha, Guttman's Lambda 6, the average r, the scale intercorrelations, and the item by scale correlations (raw and corrected for item overlap). Replace missing values with the item median or mean if desired. Items may be keyed 1 (score it), -1 ) (reverse score it), or 0 (do not score it). Negatively keyed items will be reverse scored. Although prior versions used a keys matrix, it is now recommended to just use a list of scoring keys. See \code{\link{make.keys}} for a convenient way to make the keys file. If the input is a square matrix, then it is assumed that the input is a covariance or correlation matix and scores are not found, but the item statistics are reported. (Similar functionality to \code{\link{cluster.cor}}). \code{\link{response.frequencies}} reports the frequency of item endorsements fore each response category for polytomous or multiple choice items. \code{\link{scoreFast}} and \code{\link{scoreVeryFast}} just find sums/mean scores and do not report reliabilities. Much faster for large data sets. } \usage{ scoreItems(keys, items, totals = FALSE, ilabels = NULL,missing=TRUE, impute="median", delete=TRUE, min = NULL, max = NULL, digits = 2,n.obs=NULL,select=TRUE) score.items(keys, items, totals = FALSE, ilabels = NULL,missing=TRUE, impute="median", delete=TRUE, min = NULL, max = NULL, digits = 2,select=TRUE) scoreFast(keys, items, totals = FALSE, ilabels = NULL,missing=TRUE, impute="none", delete=TRUE, min = NULL, max = NULL,count.responses=FALSE, digits = 2) scoreVeryFast(keys,items,totals=FALSE, min=NULL,max=NULL,count.responses=FALSE) response.frequencies(items,max=10,uniqueitems=NULL) responseFrequency(items,max=10,uniqueitems=NULL) } \arguments{ \item{keys}{A list of scoring keys or a matrix or dataframe of -1, 0, or 1 weights for each item on each scale which may be created by hand, or by using \code{\link{make.keys}}. Just using a list of scoring keys (see example) is probably more convenient.} \item{items}{ Matrix or dataframe of raw item scores} \item{totals}{ if TRUE find total scores, if FALSE (default), find average scores } \item{ilabels}{ a vector of item labels. } \item{missing}{missing = TRUE is the normal case and data are imputed according to the impute option. missing=FALSE, only complete cases are scored.} \item{impute}{impute="median" replaces missing values with the item medians, impute = "mean" replaces values with the mean response. impute="none" the subject's scores are based upon the average of the keyed, but non missing scores. impute = "none" is probably more appropriate for a large number of missing cases (e.g., SAPA data). } \item{delete}{if delete=TRUE, automatically delete items with no variance (and issue a warning)} \item{min}{May be specified as minimum item score allowed, else will be calculated from data. min and max should be specified if items differ in their possible minima or maxima. See notes for details.} \item{max}{May be specified as maximum item score allowed, else will be calculated from data. Alternatively, in response frequencies, it is maximum number of alternative responses to count. } \item{uniqueitems}{If specified, the set of possible unique response categories} \item{digits}{ Number of digits to report for mean scores } \item{n.obs}{If scoring from a correlation matrix, specify the number of subjects allows for the calculation of the confidence intervals for alpha.} \item{select}{By default, just find the statistics of those items that are included in scoring keys. This allows scoring of data sets that have bad data for some items that are not included in the scoring keys. This also speeds up the scoring of small subsets of item from larger data sets.} \item{count.responses}{If TRUE, report the number of items/scale answered for each subject.} } \details{The process of finding sum or average scores for a set of scales given a larger set of items is a typical problem in applied psychometrics and in psychometric research. Although the structure of scales can be determined from the item intercorrelations, to find scale means, variances, and do further analyses, it is typical to find scores based upon the sum or the average item score. For some strange reason, personality scale scores are typically given as totals, but attitude scores as averages. The default for scoreItems is the average as it would seem to make more sense to report scale scores in the metric of the item. When scoring more than one scale, it is convenient to have a list of the items on each scale and the direction to score the items. This may be converted to a keys.matrix using \code{\link{make.keys}} or may be entered as a keys.list directly. Various estimates of scale reliability include ``Cronbach's alpha", Guttman's Lambda 6, and the average interitem correlation. For k = number of items in a scale, and av.r = average correlation between items in the scale, alpha = k * av.r/(1+ (k-1)*av.r). Thus, alpha is an increasing function of test length as well as the test homeogeneity. Surprisingly, more than a century after Spearman (1904) introduced the concept of reliability to psychologists, there are still multiple approaches for measuring it. Although very popular, Cronbach's \eqn{\alpha} (1951) underestimates the reliability of a test and over estimates the first factor saturation. \eqn{\alpha}{alpha} (Cronbach, 1951) is the same as Guttman's \eqn{\lambda_3}{lambda3} (Guttman, 1945) and may be found by \deqn{ \lambda_3 = \frac{n}{n-1}\Bigl(1 - \frac{tr(\vec{V})_x}{V_x}\Bigr) = \frac{n}{n-1} \frac{V_x - tr(\vec{V}_x)}{V_x} = \alpha }{Lambda 3 = (n)/(n-1)(1-tr(Vx)/(Vx) = (n)/(n-1)(Vx-tr(Vx)/Vx = alpha} Perhaps because it is so easy to calculate and is available in most commercial programs, alpha is without doubt the most frequently reported measure of internal consistency reliability. Alpha is the mean of all possible spit half reliabilities (corrected for test length). For a unifactorial test, it is a reasonable estimate of the first factor saturation, although if the test has any microstructure (i.e., if it is ``lumpy") coefficients \eqn{\beta}{beta} (Revelle, 1979; see \code{\link{ICLUST}}) and \eqn{\omega_h}{omega_hierchical} (see \code{\link{omega}}) (McDonald, 1999; Revelle and Zinbarg, 2009) are more appropriate estimates of the general factor saturation. \eqn{\omega_t}{omega_total} (see \code{\link{omega}}) is a better estimate of the reliability of the total test. Guttman's Lambda 6 (G6) considers the amount of variance in each item that can be accounted for the linear regression of all of the other items (the squared multiple correlation or smc), or more precisely, the variance of the errors, \eqn{e_j^2}, and is \deqn{ \lambda_6 = 1 - \frac{\sum e_j^2}{V_x} = 1 - \frac{\sum(1-r_{smc}^2)}{V_x} .}{lamada 6 = 1 - sum(e^2)/Vx = 1-sum(1-r^2(smc))/Vx.} The squared multiple correlation is a lower bound for the item communality and as the number of items increases, becomes a better estimate. G6 is also sensitive to lumpyness in the test and should not be taken as a measure of unifactorial structure. For lumpy tests, it will be greater than alpha. For tests with equal item loadings, alpha > G6, but if the loadings are unequal or if there is a general factor, G6 > alpha. Although it is normal when scoring just a single scale to calculate G6 from just those items within the scale, logically it is appropriate to estimate an item reliability from all items available. This is done here and is labeled as G6* to identify the subtle difference. Alpha and G6* are both positive functions of the number of items in a test as well as the average intercorrelation of the items in the test. When calculated from the item variances and total test variance, as is done here, raw alpha is sensitive to differences in the item variances. Standardized alpha is based upon the correlations rather than the covariances. alpha is a generalization of an earlier estimate of reliability for tests with dichotomous items developed by Kuder and Richardson, known as KR20, and a shortcut approximation, KR21. (See Revelle, in prep; Revelle and Condon, in press.). A useful index is the ratio of reliable variance to unreliable variance and is known as the Signal/Noise ratio. This is just \deqn{s/n = \frac{n \bar{r}}{1-n \bar{r}}}{s/n = n r/(1-nr)} (Cronbach and Gleser, 1964; Revelle and Condon (in press)). Standard errors for unstandardized alpha are reported using the formula from Duhachek and Iacobucci (2005). More complete reliability analyses of a single scale can be done using the \code{\link{omega}} function which finds \eqn{\omega_h}{omega_hierchical} and \eqn{\omega_t}{omega_total} based upon a hierarchical factor analysis. Alternative estimates of the Greatest Lower Bound for the reliability are found in the \code{\link{guttman}} function. Alpha is a poor estimate of the general factor saturation of a test (see Revelle and Zinbarg, 2009; Zinbarg et al., 2005) for it can seriously overestimate the size of a general factor, and a better but not perfect estimate of total test reliability because it underestimates total reliability. None the less, it is a common statistic to report. In general, the use of alpha should be discouraged and the use of more appropriate estimates (\eqn{\omega_h}{omega_hierchical} and \eqn{\omega_t}{omega_total}) should be encouraged. Correlations between scales are attenuated by a lack of reliability. Correcting correlations for reliability (by dividing by the square roots of the reliabilities of each scale) sometimes help show structure. This is done in the scale intercorrelation matrix with raw correlations below the diagonal and unattenuated correlation above the diagonal. There are several alternative ways to treat missing values. By default, missing values are replaced with the corresponding median value for that item. Means can be used instead (impute="mean"), or subjects with missing data can just be dropped (missing = FALSE). For data with a great deal of missingness, yet another option is to just find the average of the available responses (impute="none"). This is useful for findings means for scales for the SAPA project (see \url{https://www.sapa-project.org/}) where most scales are estimated from random sub samples of the items from the scale. In this case, the alpha reliabilities are seriously overinflated because they are based upon the total number of items in each scale. The "alpha observed" values are based upon the average number of items answered in each scale using the standard form for alpha a function of inter-item correlation and number of items. Using the impute="none" option as well as asking for totals (totals="TRUE") will be done, although a warning will be issued because scores will now reflect the number of items responded to much more than the actual pattern of responses. The number of missing responses for each person for each scale is reported in the missing object. One possibility is to drop scores just for those scales with missing responses. This may be done adding the code: scores$scores[scores$missing >0] <- NA This is shown in the last example. Note that the default for scoreItems is to impute missing items with their median, but the default for scoreFAst is to not impute but must return the scale scores based upon the mean or total value for the items scored. By default, \code{\link{scoreItems}} will drop those items with no variance. This changes the reliability calculations (number of items is reduced), and it mean that those items are not used in finding scores. Using the delete=FALSE option, these variables will not be dropped and their scores will be found. Various warnings are issued about the SMC not being correct. What do you you expect when there is no variance for an item? \code{\link{scoreItems}} can be applied to correlation matrices to find just the reliability statistics. This will be done automatically if the items matrix is symmetric. \code{\link{scoreFast}} just finds the scores (with or without imputation) and does not report other statistics. It is much faster! \code{\link{scoreVeryFast}} is even more stripped down, no imputation, just scores based upon the observed data. No statistics. } \value{ \item{scores }{Sum or average scores for each subject on the k scales} \item{alpha }{Cronbach's coefficient alpha. A simple (but non-optimal) measure of the internal consistency of a test. See also beta and omega. Set to 1 for scales of length 1. } \item{av.r}{The average correlation within a scale, also known as alpha 1, is a useful index of the internal consistency of a domain. Set to 1 for scales with 1 item.} \item{G6}{Guttman's Lambda 6 measure of reliability} \item{G6*}{A generalization of Guttman's Lambda 6 measure of reliability using all the items to find the smc.} \item{n.items}{Number of items on each scale} \item{item.cor}{The correlation of each item with each scale. Because this is not corrected for item overlap, it will overestimate the amount that an item correlates with the other items in a scale.} \item{cor}{The intercorrelation of all the scales based upon the interitem correlations (see note for why these differ from the correlations of the observed scales themselves).} \item{corrected}{The correlations of all scales (below the diagonal), alpha on the diagonal, and the unattenuated correlations (above the diagonal)} \item{item.corrected}{The item by scale correlations for each item, corrected for item overlap by replacing the item variance with the smc for that item} \item{response.freq}{The response frequency (based upon number of non-missing responses) for each alternative.} \item{missing}{How many items were not answered for each scale } \item{num.ob.item}{The average number of items with responses on a scale. Used in calculating the alpha.observed-- relevant for SAPA type data structures.} } \note{It is important to recognize in the case of massively missing data (e.g., data from a Synthetic Aperture Personality Assessment (\url{https://www.sapa-project.org/}) or the International Cognitive Ability Resources (\url{https://icar-project.org})) study where perhaps only 10-50\% of the items per scale are given to any one subject)) that the number of items per scale, and hence standardized alpha, is not the nominal value and hence alpha of the observed scales will be overestimated. For this case (impute="none"), an additional alpha (alpha.ob) is reported. More importantly in this case of massively missing data, there is a difference between the correlations of the composite scales based upon the correlations of the items and the correlations of the scored scales based upon the observed data. That is, the cor object will have correlations as if all items had been given, while the correlation of the scores object will reflect the actual correlation of the scores. For SAPA data, it is recommended to use the cor object. Confidence of these correlations may be found using the \code{\link{cor.ci}} function. Further note that the inter-scale correlations are based upon the correlations of scales formed from the covariance matrix of the items. This will differ from the correlation of scales based upon the correlation of the items. Thus, although \code{\link{scoreItems}} will produce reliabilities and intercorrelations from either the raw data or from a correlation matrix, these values will differ slightly. In addition, with a great deal of missing data, the scale intercorrelations will differ from the correlations of the scores produced, for the latter will be attenuated. An alternative to classical test theory scoring is to use \code{\link{scoreIrt}} to find score estimates based upon Item Response Theory. This is particularly useful in the case of SAPA data which tend to be massively missing. It is also useful to find scores based upon polytomous items following a factor analysis of the polychoric correlation matrix (see \code{\link{irt.fa}}). However, remember that this only makes sense if the items are unidimensional. That is to say, if forming item composites from (e.g., \code{\link{bestScales}}), that are empirically derived, they will necessarily have a clear factor structure and the IRT based scoring does not make sense. When reverse scoring items from a set where items differ in their possible minima or maxima, it is important to specify the min and max values. Items are reversed by subtracting them from max + min. Thus, if items range from 1 to 6, items are reversed by subtracting them from 7. But, if the data set includes other variables, (say an id field) that far exceeds the item min or max, then the max id will incorrectly be used to reverse key. min and max can either be single values, or vectors for all items. Compare two examples of scoring the \code{\link[psychTools]{bfi}} data set. If scales are formed with overlapping items, then the correlations of the scales will be seriously inflated. \code{\link{scoreOverlap}} will adjust the correlations for this overlap. Yet another possibility for scoring large data sets is to ignore all the reliability calculations and just find the scores. This may be done using \code{\link{scoreFast}} or \code{\link{scoreVeryFast}}. These two functions just find mean scores (\code{\link{scoreVeryFast}} without imputation) or will do imputation if desired \code{\link{scoreFast}}. For 200K cases on 1000 variables with 11 scales, \code{\link{scoreVeryFast}} took 4.7 seconds on a Mac PowerBook with a 2.8GHZ Intel I7 and \code{\link{scoreFast}} took 23.2 seconds. \code{\link{scoreIrt.1pl}} for the same problem took xxx with options("mc.cores"=1) (not parallel processing) and 1259 seconds with options("mc.cores"=NULL) (implying 2 cores) and with four cores was very slow (probably too much parallel processing). Yet one more possibility is to find scores based upon a matrix of weights (e.g. zero order correlations, beta weights, or factor weights.) In this case, scores are simply the product of the weights times the (standardized) items. If using coefficients from a regression analysis (lm), a column of 1's is added to the data and labeled "(Intercept)" and this is used in the calculation. This is done by \code{\link{scoreWtd}}. } \references{ Cronbach, L.J. and Gleser G.C. (1964)The signal/noise ratio in the comparison of reliability coefficients. Educational and Psychological Measurement, 24 (3) 467-480. Duhachek, A. and Iacobucci, D. (2004). Alpha's standard error (ase): An accurate and precise confidence interval estimate. Journal of Applied Psychology, 89(5):792-808. McDonald, R. P. (1999). Test theory: A unified treatment. L. Erlbaum Associates, Mahwah, N.J. Revelle, W. (in preparation) An introduction to psychometric theory with applications in R. \url{https://personality-project.org/r/book/} Revelle, W. and Condon, D.C. Reliability. In Irwing, P., Booth, T. and Hughes, D. (Eds). the Wiley-Blackwell Handbook of Psychometric Testing (in press). Revelle W. and R.E. Zinbarg. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 74(1):145-154. Zinbarg, R. E., Revelle, W., Yovel, I. and Li, W. (2005) Cronbach's alpha, Revelle's beta, and McDonald's omega h, Their relations with each other and two alternative conceptualizations of reliability, Psychometrika, 70, 123-133. } \author{ William Revelle } \seealso{\code{\link{make.keys}} for a convenient way to create the keys file, \code{\link{score.multiple.choice}} for multiple choice items, \cr \code{\link{alpha}}, \code{\link{correct.cor}}, \code{\link{cluster.cor}} , \code{\link{cluster.loadings}}, \code{\link{omega}}, \code{\link{guttman}} for item/scale analysis. If scales are formed from overlapping sets of items, their correlations will be inflated. This is corrected for when using the \code{\link{scoreOverlap}} function which, although it will not produce scores, will report scale intercorrelations corrected for item overlap. In addition, the \code{\link{irt.fa}} function provides an alternative way of examining the structure of a test and emphasizes item response theory approaches to the information returned by each item and the total test. Associated with these IRT parameters is the \code{\link{scoreIrt}} function for finding IRT based scores as well as \code{\link{irt.responses}} to show response curves for the alternatives in a multiple choice test. \code{\link{scoreIrt}} will find both IRT based estimates as well as average item response scores. These latter correlate perfectly with those found by scoreItems. If using a keys matrix, the score.irt results are based upon the item difficulties with the assumption that all items are equally discriminating (effectively a Rasch model). These scores are probably most useful in the case of massively missing data because they can take into account the item difficulties. \code{\link{scoreIrt.1pl}} finds the item difficulty parameters and then applies a 1 parameter (Rasch like) model. It chooses items based upon a keys.list. } \examples{ #see the example including the bfi data set data(psychTools::bfi) keys.list <- list(agree=c("-A1","A2","A3","A4","A5"), conscientious=c("C1","C2","C3","-C4","-C5"),extraversion=c("-E1","-E2","E3","E4","E5"), neuroticism=c("N1","N2","N3","N4","N5"), openness = c("O1","-O2","O3","O4","-O5")) keys <- make.keys(psychTools::bfi,keys.list) #no longer necessary scores <- scoreItems(keys,psychTools::bfi,min=1,max=6) #using a keys matrix scores <- scoreItems(keys.list,psychTools::bfi,min=1,max=6) # or just use the keys.list summary(scores) #to get the response frequencies, we need to not use the age variable scores <- scoreItems(keys[1:25,],psychTools::bfi[1:25]) #we do not need to specify min or #max if there are no values (such as age) outside the normal item range. scores #The scores themselves are available in the scores$scores object. I.e., describe(scores$scores) #compare this output to that for the impute="none" option for SAPA type data #first make many of the items missing in a missing pattern way missing.bfi <- psychTools::bfi missing.bfi[1:1000,3:8] <- NA missing.bfi[1001:2000,c(1:2,9:10)] <- NA scores <- scoreItems(keys.list,missing.bfi,impute="none",min=1,max=6) scores describe(scores$scores) #the actual scores themselves #If we want to delete scales scores for people who did not answer some items for one #(or more) scales, we can do the following: scores <- scoreItems(keys.list,missing.bfi,totals=TRUE,min=1,max=6) #find total scores describe(scores$scores) #note that missing data were replaced with median for the item scores$scores[scores$missing > 0] <- NA #get rid of cases with missing data describe(scores$scores) } \keyword{ multivariate } \keyword{models} psych/man/sim.VSS.Rd0000644000176200001440000000252111222300725013676 0ustar liggesusers\name{sim.VSS} \alias{sim.VSS} \alias{VSS.simulate} \alias{VSS.sim} \title{ create VSS like data} \description{Simulation is one of most useful techniques in statistics and psychometrics. Here we simulate a correlation matrix with a simple structure composed of a specified number of factors. Each item is assumed to have complexity one. See \code{\link{circ.sim}} and \code{\link{item.sim}} for alternative simulations. } \usage{ sim.VSS(ncases=1000, nvariables=16, nfactors=4, meanloading=.5,dichot=FALSE,cut=0) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ncases}{ number of simulated subjects } \item{nvariables}{ Number of variables } \item{nfactors}{ Number of factors to generate } \item{meanloading}{with a mean loading } \item{dichot}{dichot=FALSE give continuous variables, dichot=TRUE gives dichotomous variables} \item{cut}{if dichotomous = TRUE, then items with values > cut are assigned 1, otherwise 0.} } \value{a ncases x nvariables matrix } \author{ William Revelle} \seealso{ \code{\link{VSS}}, \code{\link{ICLUST}} } \examples{ \dontrun{ simulated <- sim.VSS(1000,20,4,.6) vss <- VSS(simulated,rotate="varimax") VSS.plot(vss) } } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line \keyword{ datagen }% __ONLY ONE__ keyword per line psych/man/harmonic.mean.Rd0000644000176200001440000000247313056557227015203 0ustar liggesusers\name{harmonic.mean} \alias{harmonic.mean} \title{ Find the harmonic mean of a vector, matrix, or columns of a data.frame} \description{The harmonic mean is merely the reciprocal of the arithmetic mean of the reciprocals. } \usage{ harmonic.mean(x,na.rm=TRUE,zero=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ a vector, matrix, or data.frame } \item{na.rm}{na.rm=TRUE remove NA values before processing} \item{zero}{If TRUE, then if there are any zeros, return 0, else, return the harmonic mean of the non-zero elements} } \details{Included as an example for teaching about functions. As well as for a discussion of how to estimate central tendencies. Also used in \code{\link{statsBy}} to weight by the harmonic mean. Values of 0 can be included (in which case the harmonic.mean = 0) or converted to NA according to the zero option. Added the zero option, March, 2017. } \value{ The harmonic mean(s) } \note{Included as a simple demonstration of how to write a function} \examples{ x <- seq(1,5) x2 <- x^2 x2[2] <- NA y <- x - 1 X <- data.frame(x,x2,y) harmonic.mean(x) harmonic.mean(x2) harmonic.mean(X) harmonic.mean(X,na.rm=FALSE) harmonic.mean(X,zero=FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/spider.Rd0000644000176200001440000001433314105041246013731 0ustar liggesusers\name{spider} \alias{spider} \alias{radar} \title{Make "radar" or "spider" plots.} \description{ Radar plots and spider plots are just two of the many ways to show multivariate data. \code{\link{radar}} plots correlations as vectors ranging in length from 0 (corresponding to r=-1) to 1 (corresponding to an r=1). The vectors are arranged radially around a circle. Spider plots connect the end points of each vector. The plots are most appropriate if the variables are organized in some meaningful manner. } \usage{ spider(y,x,data,labels=NULL,rescale=FALSE,center=FALSE,connect=TRUE,overlay=FALSE, scale=1,ncolors=31,fill=FALSE,main=NULL,...) radar(x,labels=NULL,keys=NULL,center=FALSE,connect=FALSE,scale=1,ncolors=31,fill=FALSE, add=FALSE,linetyp="solid", main="Radar Plot",angle=0,absolute=FALSE, show=TRUE,digits=2,cut=.2,circles=TRUE, shape=FALSE, clockwise=FALSE, delta = NULL,label.pos=NULL,position=NULL, xlim=c(-1,1),ylim=c(-1, 1),...) } \arguments{ \item{y}{The y variables to plot. Each y is plotted against all the x variables} \item{x}{The x variables defining each line. Each y is plotted against all the x variables} \item{data}{A correlation matrix from which the x and y variables are selected} \item{labels}{Labels (assumed to be colnames of the data matrix) for each x variable} \item{rescale}{If TRUE, then rescale the data to have mean 0 and sd = 1. This is used if plotting raw data rather than correlations.} \item{center}{if TRUE, then lines originate at the center of the plot, otherwise they start at the mid point.} \item{connect}{if TRUE, a spider plot is drawn, if FALSE, just a radar plot} \item{scale}{can be used to magnify the plot, to make small values appear larger.} \item{ncolors}{if ncolors > 2, then positive correlations are plotted with shades of blue and negative correlations shades of red. This is particularly useful if fill is TRUE. ncolors should be an odd number, so that neutral values are coded as white. } \item{fill}{if TRUE, fill the polygons with colors scaled to size of correlation} \item{overlay}{If TRUE, plot multiple spiders on one plot, otherwise plot them as separate plots} \item{add}{If TRUE, add a new spider diagram to the previous one.} \item{linetyp}{see lty in the par options} \item{main}{A label or set of labels for the plots} \item{keys}{If a keys list is provided, then variables are grouped by the keys, with labels drawn for the key names} \item{angle}{Rotate the entire figure angle/nvar to the left. Useful for drawing circumplex structures} \item{absolute}{If TRUE, then just use color to show correlation size} \item{show}{If TRUE, show the values at the end of the radar lines if they are > cut} \item{digits}{round the values to digits} \item{cut}{Just show values > cut} \item{circles}{Draw circles at .25, .5 and .75} \item{shape}{If TRUE, do not draw circles, but rather polygons with nvar sides} \item{clockwise}{If TRUE, organize the variables clockwise} \item{delta}{How far from the ends of the lines should the values be placed (defaults to 1.05 * length of line). May be vector.} \item{label.pos}{How far out should the labels be placed? (defaults to 1.05 which is just outside of the outer circle.)} \item{position}{A way of passing the pos parameter that includes NULL as a value. (See pos in graphics help) } \item{xlim}{default values may be changed for more space for labels} \item{ylim}{default values by be changed for more space for labelssap} \item{\dots}{Additional parameters can be passed to the underlying graphics call} } \details{Displaying multivariate profiles may be done by a series of lines (see, e.g., matplot), by colors (see, e.g., \code{\link{corPlot}}, or by radar or spider plots. Spiders are particularly suitable for showing data thought to have circumplex structure. To show just one variable as a function of several others, use \code{\link{radar}}. To make multiple plots, use \code{\link{spider}}. An additional option when comparing just a few y values is to do overlay plots. Alternatively, set the plotting options to do several on one page. } \value{Either a spider or radar plot} \author{William Revelle} \seealso{\code{\link{corPlot}} } \examples{ op <- par(mfrow=c(3,2)) spider(y=1,x=2:9,data=Thurstone,connect=FALSE) #a radar plot spider(y=1,x=2:9,data=Thurstone) #same plot as a spider plot spider(y=1:3,x=4:9,data=Thurstone,overlay=TRUE) #make a somewhat oversized plot spider(y=26:28,x=1:25,data=cor(psychTools::bfi,use="pairwise"),fill=TRUE,scale=2) par(op) #another example taken from Lippa (2001, page 193) lippa.df <- structure(list(labels = c("Assured - Dominant", "Gregarious\nExtraverted", "Warm\nAgreeable", "Unassuming\nIngeneous", "Unassured - Submissive", "Aloof\nIntroverted", "Cold\nHearted", "Arrogant\nCalculating" ), pos = c(0.8, 0.85, 0.83, 0.8, 0.75, 0.83, 0.85, 0.85), values = c(0.41, -0.29, -0.53, -0.61, -0.38, 0.14, 0.59, 0.6), delta = c(1.1, 1.2, 1.2, 1.1, 1.1, 1.5, 1.2, 1.1)), row.names = c(NA, -8L), class = "data.frame") radar(lippa.df$values,abs=TRUE,labels=lippa.df$labels,angle=90,clockwise=TRUE,lwd=3, label.pos=lippa.df$pos,main="Data from Lippa (2001)",scale=.9,circles=FALSE, cut=0,delta=lippa.df$delta) segments(-1,0,1,0,lwd=.2) # Add hairline axes segments(0,-1,0,1,lwd=.2) text(0,1.05,expression(italic("Masculine Instrumentality"))) text(1.05,0,expression(italic("Feminine Communion")),srt=270) #show how to draw a hexagon RIASEC.df <- structure(list(labels = c("Realistic", "Investigative", "Artistic", "Social", "Enterprising", "Conventional"), Su = c(0.84, 0.26, -0.35, -0.68, 0.04, -0.33), Morris = c(1.14, 0.32, -0.19, -0.38, 0.22, 0.23)), row.names = c(NA, -6L), class = "data.frame") radar(RIASEC.df$Morris,RIASEC.df$labels,clockwise=TRUE,angle=0,absolute=TRUE,circl=FALSE,scale=.7, position=c(1,0,0,0,0,0), lwd=4,label.pos=rep(.80,6),main="",cut=0, shape=TRUE, delta =c(1.1,1.25,1.25, 1.25, 1.45,1.45) ) text(-1.04,0,expression(italic("People")),srt=90) text(1.04,0,expression(italic("Things")),srt=270) text(0,.91,expression(italic("Data"))) text(0,-.91 ,expression(italic("Ideas"))) segments(-1,0,1,0,lwd=.2) #add hairline axes segments(0,-.86,0,.86,lwd=.2) text(0,1.2, "Data from Su") } \keyword{ multivariate } \keyword{ hplot } psych/man/ICLUST.cluster.Rd0000644000176200001440000000260013256544641015135 0ustar liggesusers\name{ICLUST.cluster} \alias{ICLUST.cluster} \title{Function to form hierarchical cluster analysis of items } \description{ The guts of the \code{\link{ICLUST}} algorithm. Called by \code{\link{ICLUST}} See ICLUST for description. } \usage{ ICLUST.cluster(r.mat, ICLUST.options,smc.items) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{r.mat}{ A correlation matrix} \item{ICLUST.options}{ A list of options (see \code{\link{ICLUST}}) } \item{smc.items}{passed from the main program to speed up processing} } \details{ See \code{\link{ICLUST}} } \value{ A list of cluster statistics, described more fully in \code{\link{ICLUST}} \item{comp1 }{Description of 'comp1'} \item{comp2 }{Description of 'comp2'} ... } \references{Revelle, W. 1979, Hierarchical Cluster Analysis and the Internal Structure of Tests. Multivariate Behavioral Research, 14, 57-74. \url{https://personality-project.org/revelle/publications/iclust.pdf} \cr See also more extensive documentation at \url{https://personality-project.org/r/r.ICLUST.html} } \author{William Revelle } \note{ Although the main code for ICLUST is here in ICLUST.cluster, the more extensive documentation is for \code{\link{ICLUST}}. } \seealso{ \code{\link{ICLUST.graph}},\code{\link{ICLUST}}, \code{\link{cluster.fit} }, \code{\link{VSS}}, \code{\link{omega} }} \keyword{multivariate } \keyword{cluster } psych/man/ICLUST.Rd0000644000176200001440000004331614346734507013471 0ustar liggesusers\name{iclust} \alias{ICLUST} \alias{iclust} \title{iclust: Item Cluster Analysis -- Hierarchical cluster analysis using psychometric principles } \description{A common data reduction technique is to cluster cases (subjects). Less common, but particularly useful in psychological research, is to cluster items (variables). This may be thought of as an alternative to factor analysis, based upon a much simpler model. The cluster model is that the correlations between variables reflect that each item loads on at most one cluster, and that items that load on those clusters correlate as a function of their respective loadings on that cluster and items that define different clusters correlate as a function of their respective cluster loadings and the intercluster correlations. Essentially, the cluster model is a Very Simple Structure factor model of complexity one (see \code{\link{VSS}}). This function applies the iclust algorithm to hierarchically cluster items to form composite scales. Clusters are combined if coefficients alpha and beta will increase in the new cluster. \eqn{\alpha}{alpha}, the mean split half correlation, and \eqn{\beta}{beta}, the worst split half correlation, are estimates of the reliability and general factor saturation of the test. (See also the \code{\link{omega}} function to estimate McDonald's coeffients \eqn{\omega_h}{omega hierarchical} and \eqn{\omega_t}{omega total}). Other reliability estimates are found in the \code{\link{reliability}} and \code{\link{splitHalf}}. } \usage{ iclust(r.mat, nclusters=0, alpha=3, beta=1, beta.size=4, alpha.size=3, correct=TRUE,correct.cluster=TRUE, reverse=TRUE, beta.min=.5, output=1, digits=2,labels=NULL,cut=0, n.iterations =0, title="ICLUST", plot=TRUE, weighted=TRUE,cor.gen=TRUE,SMC=TRUE,purify=TRUE,diagonal=FALSE) ICLUST(r.mat, nclusters=0, alpha=3, beta=1, beta.size=4, alpha.size=3, correct=TRUE,correct.cluster=TRUE, reverse=TRUE, beta.min=.5, output=1, digits=2,labels=NULL,cut=0,n.iterations = 0,title="ICLUST",plot=TRUE, weighted=TRUE,cor.gen=TRUE,SMC=TRUE,purify=TRUE,diagonal=FALSE) #iclust(r.mat) #use all defaults #iclust(r.mat,nclusters =3) #use all defaults and if possible stop at 3 clusters #ICLUST(r.mat, output =3) #long output shows clustering history #ICLUST(r.mat, n.iterations =3) #clean up solution by item reassignment } %- maybe also 'usage' for other objects documented here. \arguments{ \item{r.mat}{ A correlation matrix or data matrix/data.frame. (If r.mat is not square i.e, a correlation matrix, the data are correlated using pairwise deletion for Pearson correlations. } \item{nclusters}{Extract clusters until nclusters remain (default will extract until the other criteria are met or 1 cluster, whichever happens first). See the discussion below for alternative techniques for specifying the number of clusters. } \item{alpha}{ Apply the increase in alpha criterion (0) never or for (1) the smaller, 2) the average, or 3) the greater of the separate alphas. (default = 3) } \item{beta}{ Apply the increase in beta criterion (0) never or for (1) the smaller, 2) the average, or 3) the greater of the separate betas. (default =1) } \item{beta.size}{ Apply the beta criterion after clusters are of beta.size (default = 4)} \item{alpha.size}{ Apply the alpha criterion after clusters are of size alpha.size (default =3) } \item{correct}{ Correct correlations for reliability (default = TRUE) } \item{correct.cluster}{Correct cluster -sub cluster correlations for reliability of the sub cluster , default is TRUE))} \item{reverse}{Reverse negative keyed items (default = TRUE} \item{beta.min}{ Stop clustering if the beta is not greater than beta.min (default = .5) } \item{output}{ 1) short, 2) medium, 3 ) long output (default =1)} \item{labels}{vector of item content or labels. If NULL, then the colnames are used. If FALSE, then labels are V1 .. Vn} \item{cut}{sort cluster loadings > absolute(cut) (default = 0) } \item{n.iterations}{iterate the solution n.iterations times to "purify" the clusters (default = 0)} \item{digits}{ Precision of digits of output (default = 2) } \item{title}{ Title for this run } \item{plot}{Should ICLUST.diagram be called automatically for plotting (does not require Rgraphviz default=TRUE)} \item{weighted}{Weight the intercluster correlation by the size of the two clusters (TRUE) or do not weight them (FALSE)} \item{cor.gen}{When correlating clusters with subclusters, base the correlations on the general factor (default) or general + group (cor.gen=FALSE)} \item{SMC}{When estimating cluster-item correlations, use the smcs as the estimate of an item communality (SMC=TRUE) or use the maximum correlation (SMC=FALSE).} \item{purify}{Should clusters be defined as the original groupings (purify = FAlSE) or by the items with the highest loadings on those original clusters? (purify = TRUE) } \item{diagonal}{Should the diagonal be included in the fit statistics. The default is not to include it. Prior to 1.2.8, the diagonal was included.} } \details{ Extensive documentation and justification of the algorithm is available in the original MBR 1979 \url{https://personality-project.org/revelle/publications/iclust.pdf} paper. Further discussion of the algorithm and sample output is available on the personality-project.org web page: \url{https://personality-project.org/r/r.ICLUST.html} A common problem in the social sciences is to construct scales or composites of items to measure constructs of theoretical interest and practical importance. This process frequently involves administering a battery of items from which those that meet certain criteria are selected. These criteria might be rational, empirical,or factorial. A similar problem is to analyze the adequacy of scales that already have been formed and to decide whether the putative constructs are measured properly. Both of these problems have been discussed in numerous texts, as well as in myriad articles. Proponents of various methods have argued for the importance of face validity, discriminant validity, construct validity, factorial homogeneity, and theoretical importance. Revelle (1979) proposed that hierachical cluster analysis could be used to estimate a new coefficient (beta) that was an estimate of the general factor saturation of a test. More recently, Zinbarg, Revelle, Yovel and Li (2005) compared McDonald's Omega to Cronbach's alpha and Revelle's beta. They conclude that \eqn{\omega_h}{omega} hierarchical is the best estimate. An algorithm for estimating \code{\link{omega} } is available as part of the psych package. Revelle and Zinbarg (2009) discuss alpha, beta, and omega, as well as other estimates of reliability. The original ICLUST program was written in FORTRAN to run on CDC and IBM mainframes and was then modified to run in PC-DOS. The R version of iclust is a completely new version written for the psych package. A requested feature (not yet available) is to specify certain items as forming a cluster. That is, to do confirmatory cluster analysis. The program currently has three primary functions: cluster, loadings, and graphics. In June, 2009, the option of weighted versus unweighted beta was introduced. Unweighted beta calculates beta based upon the correlation between two clusters, corrected for test length using the Spearman-Brown prophecy formala, while weighted beta finds the average interitem correlation between the items within two clusters and then finds beta from this. That is, for two clusters A and B of size N and M with between average correlation rb, weighted beta is (N+M)^2 rb/(Va +Vb + 2Cab). Raw (unweighted) beta is 2rab/(1+rab) where rab = Cab/sqrt(VaVb). Weighted beta seems a more appropriate estimate and is now the default. Unweighted beta is still available for consistency with prior versions. Also modified in June, 2009 was the way of correcting for item overlap when calculating the cluster-subcluster correlations for the graphic output. This does not affect the final cluster solution, but does produce slightly different path values. In addition, there are two ways to solve for the cluster - subcluster correlation. Given the covariance between two clusters, Cab with average rab = Cab/(N*M), and cluster variances Va and Vb with Va = N + N*(N-1)*ra then the correlation of cluster A with the combined cluster AB is either a) ((N^2)ra + Cab)/sqrt(Vab*Va) (option cor.gen=TRUE) or b) (Va - N + Nra + Cab)/sqrt(Vab*Va) (option cor.gen=FALSE) The default is to use cor.gen=TRUE. Although iclust will give what it thinks is the best solution in terms of the number of clusters to extract, the user will sometimes disagree. To get more clusters than the default solution, just set the nclusters parameter to the number desired. However, to get fewer than meet the alpha and beta criteria, it is sometimes necessary to set alpha=0 and beta=0 and then set the nclusters to the desired number. Clustering 24 tests of mental ability A sample output using the 24 variable problem by Harman can be represented both graphically and in terms of the cluster order. The default is to produce graphics using the \code{\link{diagram}} functions. An alternative is to use the Rgraphviz package (from BioConductor). Because this package is sometimes hard to install, there is an alternative option (\code{\link{ICLUST.graph}} to write dot language instructions for subsequent processing. This will create a graphic instructions suitable for any viewing program that uses the dot language. \code{\link{ICLUST.rgraph}} produces the dot code for Graphviz. Somewhat lower resolution graphs with fewer options are available in the \code{\link{ICLUST.diagram}} function which does not require Rgraphviz. Dot code can be viewed directly in Graphviz or can be tweaked using commercial software packages (e.g., OmniGraffle) Note that for the Harman 24 variable problem, with the default parameters, the data form one large cluster. (This is consistent with the Very Simple Structure (\code{\link{VSS}}) output as well, which shows a clear one factor solution for complexity 1 data.) An alternative solution is to ask for a somewhat more stringent set of criteria and require an increase in the size of beta for all clusters greater than 3 variables. This produces a 4 cluster solution. It is also possible to use the original parameter settings, but ask for a 4 cluster solution. At least for the Harman 24 mental ability measures, it is interesting to compare the cluster pattern matrix with the oblique rotation solution from a factor analysis. The factor congruence of a four factor oblique pattern solution with the four cluster solution is > .99 for three of the four clusters and > .97 for the fourth cluster. The cluster pattern matrix (returned as an invisible object in the output) In September, 2012, the fit statistics (pattern fit and cluster fit) were slightly modified to (by default) not consider the diagonal (diagonal=FALSE). Until then, the diagonal was included in the cluster fit statistics. The pattern fit is analogous to factor analysis and is based upon the model = P x Structure where Structure is Pattern * Phi. Then R* = R - model and fit is the ratio of sum(r*^2)/sum(r^2) for the off diagonal elements. The results are best visualized using \code{\link{ICLUST.graph}}, the results of which can be saved as a dot file for the Graphviz program. https://www.graphviz.org/. The \code{\link{iclust.diagram}} is called automatically to produce cluster diagrams. The resulting diagram is not quite as pretty as what can be achieved in dot code but is quite adequate if you don't want to use an external graphics program. With the installation of Rgraphviz, ICLUST can also provide cluster graphs. } \value{ \item{title }{Name of this analysis} \item{results}{A list containing the step by step cluster history, including which pair was grouped, what were the alpha and betas of the two groups and of the combined group. Note that the alpha values are ``standardized alphas'' based upon the correlation matrix, rather than the raw alphas that will come from \code{\link{scoreItems}} The print.psych and summary.psych functions will print out just the must important results.} \item{corrected}{The raw and corrected for alpha reliability cluster intercorrelations.} \item{clusters}{a matrix of -1, 0, and 1 values to define cluster membership.} \item{purified}{A list of the cluster definitions and cluster loadings of the purified solution. These are sorted by importance (the eigenvalues of the clusters). The cluster membership from the original (O) and purified (P) clusters are indicated along with the cluster structure matrix. These item loadings are the same as those found by the \code{\link{scoreItems}} function and are found by correcting the item-cluster correlation for item overlap by summing the item-cluster covariances with all except that item and then adding in the smc for that item. These resulting correlations are then corrected for scale reliability. To show just the most salient items, use the cutoff option in \code{\link{print.psych}} } \item{cluster.fit, structure.fit, pattern.fit}{There are a number of ways to evaluate how well any factor or cluster matrix reproduces the original matrix. Cluster fit considers how well the clusters fit if only correlations with clusters are considered. Structure fit evaluates R = CC' while pattern fit evaluate R = C inverse (phi) C' where C is the cluster loading matrix, and phi is the intercluster correlation matrix.} \item{pattern}{The pattern matrix loadings. Pattern is just C inverse (Phi). The pattern matrix is conceptually equivalent to that of a factor analysis, in that the pattern coefficients are b weights of the cluster to the variables, while the normal cluster loadings are correlations of the items with the cluster. The four cluster and four factor pattern matrices for the Harman problem are very similar.} \item{order}{This is a vector of the variable names in the order of the cluster diagram. This is useful as a tool for sorting correlations matrices. See the last example.} } \references{Revelle, W. Hierarchical Cluster Analysis and the Internal Structure of Tests. Multivariate Behavioral Research, 1979, 14, 57-74. Revelle, W. and Zinbarg, R. E. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 2009. \url{https://personality-project.org/revelle/publications/iclust.pdf} \cr See also more extensive documentation at \url{https://personality-project.org/r/r.ICLUST.html} and \cr Revelle, W. (in prep) An introduction to psychometric theory with applications in R. To be published by Springer. (working draft available at \url{https://personality-project.org/r/book/} } \author{William Revelle } \note{iclust draws graphical displays with or without using Rgraphiviz. Because of difficulties installing Rgraphviz on many systems, the default it not even try using it. With the introduction of the \code{\link{diagram}} functions, iclust now draws using iclust.diagram which is not as pretty as using Rgraphviz, but more stable. However, Rgraphviz can be used by using \code{\link{ICLUST.rgraph}} to produces slightly better graphics. It is also possible to export dot code in the dot language for further massaging of the graphic. This may be done using \code{\link{ICLUST.graph}}. This last option is probably preferred for nice graphics which can be massaged in any dot code program (e.g., graphviz (https://graphviz.org) or a commercial program such as OmniGraffle. To view the cluster structure more closely, it is possible to save the graphic output as a pdf and then magnify this using a pdf viewer. This is useful when clustering a large number of variables. In order to sort the clusters by cluster loadings, use \code{\link{iclust.sort}}. By default, the correlations used for the similarity matrix are Pearson correlations. It is of coure possible to \code{\link{tetrachoric}} or \code{\link{polychoric}} to form the correlation matrix for later analysis. iclust can also be used to organize complex correlation matrices. Thus, by clustering the items in a correlation matrix, sorting that matrix by the cluster loadings using \code{\link{mat.sort}}, and then plotting with \code{\link{corPlot}}. See the penultimate example. An alterative way of ordering the variables is to use the order object which is just a vector of the variable names sorted by the way they appear in the tree diagram. (See the final example.) } \seealso{ \code{\link{iclust.sort}}, \code{\link{ICLUST.graph}}, \code{\link{ICLUST.cluster}}, \code{\link{cluster.fit} }, \code{\link{VSS}}, \code{\link{omega}} } \examples{ test.data <- Harman74.cor$cov ic.out <- iclust(test.data,title="ICLUST of the Harman data") summary(ic.out) #use all defaults and stop at 4 clusters ic.out4 <- iclust(test.data,nclusters =4,title="Force 4 clusters") summary(ic.out4) ic.out1 <- iclust(test.data,beta=3,beta.size=3) #use more stringent criteria ic.out #more complete output plot(ic.out4) #this shows the spatial representation #use a dot graphics viewer on the out.file #dot.graph <- ICLUST.graph(ic.out,out.file="test.ICLUST.graph.dot") #show the equivalent of a factor solution fa.diagram(ic.out4$pattern,Phi=ic.out4$Phi,main="Pattern taken from iclust") #demonstrating the use of labels using the bfi data set iclust(psychTools::bfi[,1:25], labels=psychTools::bfi.dictionary[1:25,2] , title="ICLUST of bfi data set") #organize a correlation matrix based upon cluster solution. R <- cor(psychTools::bfi, use="pairwise") ic <- iclust(R,plot=FALSE) #suppress the plot R.s <- mat.sort(R, ic) corPlot(R.s, main ="bfi sorted by iclust loadings") #compare with corPlot(R[ic$order,ic$order] ,main="bfi sorted by iclust order") } \keyword{multivariate}% at least one, from doc/KEYWORDS \keyword{cluster}% __ONLY ONE__ keyword per line psych/man/fparse.Rd0000644000176200001440000000240514526764056013742 0ustar liggesusers\name{fparse} \alias{fparse} \title{ Parse and exten formula input from a model and return the DV, IV, and associated terms. } \description{ Formula input from e.g., lm, may be extended to include mediators, quadratic and partial terms using a standard syntax. This is use by \code{\link{lmCor}} and \code{\link{mediate}}. } \usage{ fparse(expr) } \arguments{ \item{expr}{A legitimate expression in the form y ~ x1 , etc. (see details)} } \value{ \item{y}{A list of elements from the left side of the formula} \item{x}{A list of elements from the right side of the formula} \item{m}{A list of those elements of the formula included in ()} \item{prod}{A list of elements separated by a * sign} \item{ex}{A list of elements marked by I()} } \details{The basic formula input given as DV1 + DV2 ~ IV1 + IV2 + (IV3) + I(IV4^2) - IV5 will be parsed to return 2 DVs (1 and 2), two normal IVs (1 and 2), a mediator (IV3) a quadratic (IV4) and a variable to be partialed (IV5). See the various examples in \code{\link{lmCor}} and \code{\link{mediate}}. } \author{William Revelle } \examples{ fparse(DV ~ IV1 + IV2 * IV2*IV3 + (IV4) + I(IV5^2) ) #somewhat more complicated fparse(DV1 + DV2 ~ IV1 + IV2 + IV3*IV4 + I(IV5^2) + I(Iv6^2) + (IV7) + (IV8) - IV9) } \keyword{ utilities } psych/man/best.scales.Rd0000644000176200001440000004047514265334362014672 0ustar liggesusers\name{bestScales} \alias{bestItems} \alias{bestScales} \alias{BISCUIT} \alias{biscuit} \alias{BISCWIT} \alias{biscwit} \title{A bootstrap aggregation function for choosing most predictive unit weighted items} \description{\code{\link{bestScales}} forms scales from the items/scales most correlated with a particular criterion and then cross validates on a hold out sample using unit weighted scales. This may be repeated n.iter times using either basic bootstrap aggregation (bagging) techniques or K-fold cross validation. Thus, the technique is known as \code{\link{BISCUIT}} (Best Items Scales that are Cross validated, Unit weighted, Informative, and Transparent). Given a dictionary of item content, \code{\link{bestScales}} will sort by criteria correlations and display the item content. Options for bagging (bootstrap aggregation) are included. An alternative to unit weighting is to weight items by their zero order correlations (cross validated) with the criteria. This weighted version is called \code{\link{BISCWIT}} and is an optional output. } \usage{ bestScales(x,criteria,min.item=NULL,max.item=NULL, delta = 0, cut=.1, n.item =10, wtd.cut=0, wtd.n=10, n.iter =1, folds=1, p.keyed=.9, overlap=FALSE, dictionary=NULL, check=TRUE, impute="none",log.p=FALSE,digits=2) bestItems(x,criteria=1,cut=.1, n.item=10, abs=TRUE, dictionary=NULL,check=FALSE,digits=2,use="pairwise",method="pearson") } \arguments{ \item{x}{A data matrix or data frame depending upon the function.} \item{criteria}{Which variables (by name or location) should be the empirical target for bestScales and bestItems. May be a separate object. } \item{min.item}{Find unit weighted and correlation weighted scales from min.item to max.item} \item{max.item}{These are all summarized in the final.multi.valid object} \item{delta}{Return items where the predicted r + delta * se of r < max value} \item{cut}{Return all values in abs(x[,c1]) > cut.} \item{wtd.cut}{When finding the weighted scales, use all items with zero order correlations > wtd.cut} \item{wtd.n}{When finding the weighted scales, use the wtd.n items that are > than wtd.cut} \item{abs}{if TRUE, sort by absolute value in bestItems} \item{dictionary}{a data.frame with rownames corresponding to rownames in the f$loadings matrix or colnames of the data matrix or correlation matrix, and entries (may be multiple columns) of item content.} \item{check}{if TRUE, delete items with no variance} \item{n.item}{How many items make up an empirical scale, or (bestItems, show the best n.items) } \item{overlap}{Are the correlations with other criteria fair game for bestScales} \item{impute}{When finding the best scales, and thus the correlations with the criteria, how should we handle missing data? The default is to drop missing items. (That is to say, to use pairwise complete correlations.)} \item{n.iter}{How many times to perform a bootstrap estimate. Replicate the best item function n.iter times, sampling roughly 1-1/e of the cases each time, and validating on the remaining 1/e of the cases for each iteration.} \item{folds}{If folds > 1, this is k-folds validation. Note, set n.iter > 1 to do bootstrap aggregation, or set folds > 1 to do k-folds. } \item{p.keyed}{The proportion of replications needed to include items in the final best keys.} \item{log.p}{Select items based upon the log of the probability of the correlations. This will only have an effect if the number of pairwise cases differs drastically from pair to pair. } \item{digits}{round to digits when showing output.} \item{use}{How to handle missing data. Defaults to "pairwise"} \item{method}{Which correlation to find. Defaults to "pearson"} } \details{ There are a number of procedures that can be used for predicting criteria from a set of predictors. The generic term for this is "machine learning" or "statistical learning". The basic logic of these procedures is to find a set of items that best predict a criteria according to some fit statistic and then cross validate these items numerous times. "lasso" regression (least absolute shrinkage and selection) is one such example. \code{\link{bestScales}} differs from these procedures by unit weighting items chosen from their zero order correlations with the criteria rather than weighting the partial correlations ala regression. This is an admittedly simple procedure that takes into account the well known finding (Wilks, 1938; Wainer, 1976; Dawes, 1979; Waller, 2008) that although regression weights are optimal for any particular data set, unit weights are almost as good (fungible) and more robust across sample variation. Following some suggestions, we have added the ability to find scales where items are weighted by their zero order correlations with the criteria. This is effectively a comprimise between unit weighting and regression weights (where the weights are the zero order correlations times the inverse of the correlation matrix). This weighted version may be thought of as \code{\link{BISCWIT}} in contrast to the unit weighted version \code{\link{BISCUIT}}. To be comparable to other ML algorithms, we now consider multiple solutions (for number of items >= min.item to max.item). The final scale consists of the number items which maximize the validity or at least are within delta * standard error of r of the maximum. Thus, \code{\link{bestScales}} will find up to n.items per criterion that have absolute correlations with a criterion greater than cut. If the overlap option is FALSE (default) the other criteria are not used. This is an example of ``dust bowl empiricism" in that there is no latent construct being measured, just those items that most correlate with a set of criteria. The empirically identified items are then formed into scales (ignoring concepts of internal consistency) which are then correlated with the criteria. Clearly, \code{\link{bestScales}} is capitalizing on chance associations. Thus, we should validate the empirical scales by deriving them on a fraction of the total number of subjects, and cross validating on the remaining subjects. (This is known both as K-fold cross validation and bagging. Both may be done). If folds > 1, then a k-fold cross validation is done. This removes 1/k (a fold) from the sample for the derivation sample and validates on that remaining fold. This is done k-folds times. Traditional cross validation would thus be a k-fold with k =2. More modern applications seem to prefer k =10 to have 90\% derivation sample and a 10\% cross validation sample. The alternative, known as 'bagging' is to do a bootstrap sample (which because it is sampling with replacement will typically extract 1- 1/e = 63.2\% of the sample) for the derivation sample (the bag) and then validate it on the remaining 1/e of the sample (the out of bag). This is done n.iter times. This should be repeated multiple times (n.iter > 1, e.g. n.iter=1000) to get stable cross validations. One can compare the validity of these two approaches by trying each. The average predictability of the n.iter samples are shown as are the average validity of the cross validations. This can only be done if x is a data matrix/ data.frame, not a correlation matrix. For very large data sets (e.g., those from SAPA) these scales seem very stable. \code{\link{bestScales}} is effectively a straight forward application of 'bagging' (bootstrap aggregation) and machine learning as well as k-fold validation. The criteria can be the colnames of elements of x, or can be a separate data.frame. \code{\link{bestItems}} and \code{\link{lookup}} are simple helper functions to summarize correlation matrices or factor loading matrices. \code{\link{bestItems}} will sort the specified column (criteria) of x on the basis of the (absolute) value of the column. The return as a default is just the rowname of the variable with those absolute values > cut. If there is a dictionary of item content and item names, then include the contents as a two column matrix with rownames corresponding to the item name and then as many fields as desired for item content. (See the example dictionary \code{\link[psychTools]{bfi.dictionary}}). The derived model can be further validated against yet another hold out sample using the \code{\link{predict.psych}} function if given the best scale object and the new data set. } \value{ \code{\link{bestScales}} returns the correlation of the empirically constructed scale with each criteria and the items used in the scale. If a dictionary is specified, it also returns a list (value) that shows the item content. Also returns the keys.list so that scales can be found using \code{\link{cluster.cor}} or \code{\link{scoreItems}}. If using replications (bagging or kfold) then it also returns the best.keys, a list suitable for scoring. There are actually four keys lists reported. best.keys are all the items used to form unit weighted scales with the restriction of n.item. weights may be used in the \code{\link{scoreWtd}} function to find scales based upon the raw correlation weights. If the min.item and max.item options are used, then two more sets of weights are provided. optimal.keys are a subset of the best.keys, taking just those items that increase the cross validation values up to the delta * se of the maximum. This is a slightly more parsimonious set. optimal.weights is analogous to optimal keys, but for supplies weights for just those items that are used to predict cross validation values up to delta * se of the maximum. The best.keys object is a list of items (with keying information) that may be used in subsequent analyses. These ``best.keys" are formed into scale scores for the ``final.valid" object which reports how well the best.keys work on the entire sample. This is, of course, not cross validated. Further cross validation can be done using the \code{\link{predict.psych}} function. \item{scores}{ Are the unit weighted scores from the original items} \item{best.keys}{ A key list of those items that were used in the unit weighting.} \item{wtd.scores}{Are the zero-order correlation based scores.} \item{weights}{ the scoring weights used} \item{final.multi.valid}{An object with the unit weighted and correlation weighted correlations from low.step to high.step} The print and summary output list a number of summary statistics for each criteria. This is given for the default case (number of items fixed) and then if requested, the optimal values chosen from min.item to max.item: The default statistics: \describe{ \item{derivation mean}{Mean correlation of fixed length scale with the criteria, derivation sample} \item{derivation.sd}{The standard deviation of these estimates} \item{validation.m}{The mean cross validated correlations with the criteria} \item{validation.sd}{The standard deviations of these estimates} \item{final.valid}{The correlation of the pooled models with all the subjects} \item{final.wtd}{The correlation of the pooled weighted model with all subjects} \item{N.wtd}{Number of items used in the final weighted model} } The optimal number of items statistics: \describe{ \item{n}{The mean number of items meeting the criteria} \item{unit}{The mean derivation predictive valididy} \item{n.wtd}{the mean number of items used in the wtd scales} \item{wtd}{The mean derivation wtd correlaton} \item{valid.n}{the mean number of items in the cross validation sample} \item{valid.unit}{The mean cross validated unit weighted correlations} \item{valid.wtd.n}{The mean number of items used in the cross validated correlated weighed scale} \item{valid.wtd}{The mean cross validated weighted correlation with criteria} \item{n.final}{The optimal number of items on the final cross validation sample} \item{n.wtd.final}{The optimal number of weighted items on the final cross validation.} \item{derviation.mean}{} } \code{\link{bestItems}} returns a sorted list of factor loadings or correlations with the labels as provided in the dictionary. If given raw data, then the correlations with the criteria variables are found first according to "use" and "method". The stats object can be used to create \code{\link{error.dots}} plots to show the mean estimate and the standard error of the estimates. See the examples. The resulting best scales can be cross validated on a different hold out sample using \code{\link{crossValidation}}. See the last example. } \note{ Although \code{\link{bestScales}} was designed to form the best unit weighted scales, for large data sets, there seems to be some additional information in weighting by the average zero-order correlation. To create a dictionary, create an object with row names as the item numbers, and the columns as the item content. See the \code{link{bfi.dictionary}} as an example. This is a very computationally intensive function which can be speeded up considerably by using multiple cores and using the parallel package. The number of cores to use when doing bestScales may be specified using the options command. The greatest step in speed is going from 1 core to 2. This is about a 50% savings. Going to 4 cores seems to have about at 66% savings, and 8 a 75% savings. The number of parallel processes defaults to 2 but can be modified by using the options command: options("mc.cores"=4) will set the number of cores to 4. } \references{ Dawes, R.M. (1979) The robust beauty of improper linear models in decision making, American Psychologist, 34, 571-582. Elleman, L. G., McDougald, S. K., Condon, D. M., & Revelle, W. 2020 (in press). That takes the BISCUIT: A comparative study of predictive accuracy and parsimony of four statistical learning techniques in personality data, with data missingness conditions. European Journal of Psychological Assessment. (Preprint available at https://psyarxiv.com/tuqap/) Revelle, W. (in preparation) An introduction to psychometric theory with applications in {R}. Springer. (Available online at \url{https://personality-project.org/r/book/}). Wainer, H. (1979) Estimating coefficients in linear models: It don't make no nevermind. Psychological Buletin, 83, 213-217. Waller, N.G. (2008), Fungible weights in multiple regression. Psychometrica, 73, 691-703. Wilks, S. S. (1938), Weighting systems for linear functions of correlated variables when there is no dependent variable. Psychometrika. 3. 23-40. } \author{William Revelle} \note{Although empirical scale construction is appealing, it has the basic problem of capitalizing on chance. Thus, be careful of over interpreting the results unless working with large samples. Iteration and bootstrapping aggregation (bagging) gives information on the stability of the solutions. It is also important to realize that the variables that are most related to the criterion might be because of other, trivial reasons (e.g. height predicts gender) } \seealso{ \code{\link{fa}}, \code{\link{iclust}},\code{\link{principal}}, \code{\link{error.dots}} } \examples{ #This is an example of 'bagging' (bootstrap aggregation) #not run in order to pass the timing tests for Debian at CRAN #bestboot <- bestScales(psychTools::bfi,criteria=cs(gender,age,education), # n.iter=10,dictionary=psychTools::bfi.dictionary[1:3]) #bestboot #compare with 10 fold cross validation \donttest{#don't test for purposes of speed in installation to pass the debian CRAN test tenfold <- bestScales(psychTools::bfi,criteria=cs(gender,age,education),fold=10, dictionary= psychTools::bfi.dictionary[1:3]) tenfold #Then, to display the results graphically #Note that we scale the two graphs with the same x.lim values error.dots(tenfold,eyes=TRUE,pch=16,xlim=c(0,.4)) # error.dots(bestboot,add=TRUE,xlim=c(0,.4)) #do this again, but this time display the scale fits from 1 to 15 Items # tenfold <- bestScales(psychTools::bfi,criteria=cs(gender,age,education),fold=10, # dictionary= psychTools::bfi.dictionary[1:3],min.item=1,max.item=15) # matplot(tenfold$multi.validities$wtd.deriv,typ="b", # xlab="Number of Items",main="Fit as a function of number of items") #the wtd weights # matpoints(tenfold$multi.validities$unit.deriv,typ="b",lwd=2) #the unit weights #an example of using crossValidation with bestScales set.seed(42) ss <- sample(1:2800,1400) model <- bestScales(bfi[ss,],criteria=cs(gender,education,age)) original.fit <- crossValidation(model,bfi[ss,]) #the derivation set cross.fit <- crossValidation(model,bfi[-ss,]) #the cross validation set summary(original.fit) summary(cross.fit) } } \keyword{ models } \keyword{multivariate } \keyword{tree } psych/man/sim.multilevel.Rd0000644000176200001440000001462013256544675015437 0ustar liggesusers\name{sim.multilevel} \alias{sim.multilevel} \alias{sim.multi} \title{Simulate multilevel data with specified within group and between group correlations} \description{Multilevel data occur when observations are nested within groups. This can produce correlational structures that are sometimes difficult to understand. These two simulations allow for demonstrations that correlations within groups do not imply, nor are implied by, correlations between group means. The correlations of aggregated data is sometimes called an 'ecological correlation'. That group level and individual level correlations are independent makes such inferences problematic. Within individual data are simulated in sim.multi with a variety of possible within person structures. } \usage{ sim.multi(n.obs=4,nvar = 2, nfact=2, ntrials=96, days=16, mu=0,sigma=1, fact=NULL, loading=.9, phi=0,phi.i=NULL,beta.i=0,mu.i=0, sigma.i = 1,sin.i=0, cos.i=0, AR1=0, f.i=NULL, plot=TRUE) sim.multilevel(nvar = 9, ngroups = 4, ncases = 16, rwg, rbg, eta) } \arguments{ \item{n.obs}{How many subjects should be simulated. Four allows for nice graphics, use more to examine structural properties.} \item{nvar}{How many variables are to be simulated?} \item{nfact}{How many factors are simulated,defaults to 2} \item{ntrials}{How many observations per subject (across time)} \item{days}{How many days do these observations reflect? This is relevant if we are adding sine and cosines to the model to model diurnal rhythms.} \item{mu}{The grand mean for each variable across subjects} \item{sigma}{The between person standard deviation} \item{fact}{if NULL, a two factor model is created with loadings of loading or zero in a simple structure form} \item{loading}{If fact is NULL, then we create a factor model with loading or zeros} \item{phi}{The between person factor intercorrelation } \item{phi.i}{The within person factor intercorrelations} \item{beta.i}{Within subject rate of change over trials} \item{mu.i}{The within subject mean for each subject} \item{sigma.i}{the within subject standard deviation} \item{sin.i}{To what extent should we diurnally vary by subject?} \item{cos.i}{This will specify the within subject diurnal phase (lag)} \item{AR1}{Auto regressive value implies error at time t +1 is partly a function of error at time t. } \item{f.i}{Factor loadings for each subject} \item{plot}{If TRUE, create a lattice plot for each subject} \item{ngroups}{The number of groups to simulate} \item{ncases}{The number of simulated cases} \item{rwg}{The within group correlational structure} \item{rbg}{The between group correlational structure} \item{eta}{The correlation of the data with the within data} } \details{The basic concepts of the independence of within group and between group correlations is discussed very clearly by Pedhazur (1997) as well as by Bliese (2009). \code{\link{sim.multi}} generates within subject data to model the traditional two level structure of multilevel data. This is meant to show how within subject data measured over ntrials can vary independently within and between subjects. Furthermore, several variables can correlate within subjects show a person by person factor structure. Factor scores for n.obs subjects are created for nfact factors with loadings on nvar variables. A simple structure model is assumed, so that the loadings on nvar/fact are set to loading for each factor, the others are set to 0. Factors are allowed to correlate phi between subjects and phi.i for each subject. (Which can be different for each subject). Scores can change over time with a slope of beta.i and can vary diurnally as a function of sine and cosine of time (24 hours/day converted to radians). Error is added to every trial and can be related across trials with a lag of 1. Thus, if we set AR1=1, then the errors at time t = error + error at t -1. This will lead to auto correlations of about .5. (See \code{\link{autoR}} ). \code{\link{sim.multilevel}} merely simulates pooled correlations (mixtures of between group and within group correlations) to allow for a better understanding of the problems inherent in multi-level modeling. Data (wg) are created with a particular within group structure (rwg). Independent data (bg) are also created with a between group structure (rbg). Note that although there are ncases rows to this data matrix, there are only ngroups independent cases. That is, every ngroups case is a repeat. The resulting data frame (xy) is a weighted sum of the wg and bg. This is the inverse procedure for estimating estimating rwg and rbg from an observed rxy which is done by the \code{\link{statsBy}} function. \eqn{r_{xy} = \eta_{x_{within}} * \eta_{y_{within}} * r_{xy_{within}} + \eta_{x_{between}} * \eta_{y_{between}} * r_{xy_{between}} } } \value{ \item{x.df}{A data frame for further analysis using \code{\link{statsBy}} including nvar variable values for each of n.obs subjects (id) for ntrials. } \item{wg }{A matrix (ncases * nvar) of simulated within group scores} \item{bg}{A matrix (ncases * nvar) of simulated between group scores} \item{xy}{A matrix ncases * (nvar +1) of pooled data} } \references{ P. D. Bliese. Multilevel modeling in R (2.3) a brief introduction to R, the multilevel package and the nlme package, 2009. Pedhazur, E.J. (1997) Multiple regression in behavioral research: explanation and prediction. Harcourt Brace. Revelle, W. An introduction to psychometric theory with applications in R (in prep) Springer. Draft chapters available at \url{https://personality-project.org/r/book/} } \author{William Revelle} \seealso{\code{\link{statsBy}} for the decomposition of multi level data and \code{\link{withinBetween}} for an example data set. } \examples{ #First, show a few results from sim.multi x.df <- sim.multi() #the default is 4 subjects for two variables # over 16 days measured 6 times/day #sb <- statsBy(x.df,group ="id",cors=TRUE) #round(sb$within,2) #show the within subject correlations #get some parameters to simulate data(withinBetween) wb.stats <- statsBy(withinBetween,"Group") rwg <- wb.stats$rwg rbg <- wb.stats$rbg eta <- rep(.5,9) #simulate them. Try this again to see how it changes XY <- sim.multilevel(ncases=100,ngroups=10,rwg=rwg,rbg=rbg,eta=eta) lowerCor(XY$wg) #based upon 89 df lowerCor(XY$bg) #based upon 9 df -- } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{models} psych/man/cluster.cor.Rd0000644000176200001440000002426214550352232014714 0ustar liggesusers\name{scoreOverlap} \alias{cluster.cor} \alias{scoreOverlap} \alias{scoreBy} \title{Find correlations of composite variables (corrected for overlap) from a larger matrix.} \description{ Given a n x c cluster definition matrix of -1s, 0s, and 1s (the keys) , and a n x n correlation matrix, or an N x n data matrix, find the correlations of the composite clusters. The keys matrix can be entered by hand, copied from the clipboard (\code{\link[psychTools]{read.clipboard}}), or taken as output from the \code{\link{factor2cluster}} or \code{\link{make.keys}} functions. Similar functionality to \code{\link{scoreItems}} which also gives item by cluster correlations. \code{\link{scoreBy}} does this for individual subjects after a call to \code{\link{statsBy}}. } \usage{ scoreOverlap(keys, r, correct = TRUE, SMC = TRUE, av.r = TRUE, item.smc = NULL, impute = TRUE,select=TRUE, scores=FALSE, min=NULL,max=NULL) scoreBy(keys,stats, correct = TRUE, SMC = TRUE, av.r = TRUE, item.smc = NULL, impute = TRUE,select=TRUE,min.n=3,smooth=FALSE) cluster.cor(keys, r.mat, correct = TRUE,SMC=TRUE,item.smc=NULL,impute=TRUE) } \arguments{ \item{keys}{A list of scale/cluster keys, or a matrix of cluster keys } \item{r.mat}{A correlation matrix } \item{r}{Either a correlation matrix or a raw data matrix} \item{stats}{The output from statsBy. Needed for scoreBy.} \item{correct}{ TRUE shows both raw and corrected for attenuation correlations} \item{SMC}{Should squared multiple correlations be used as communality estimates for the correlation matrix? } \item{item.smc}{the smcs of the items may be passed into the function for speed, or calculated if SMC=TRUE } \item{impute}{if TRUE and scores=FALSE, impute missing scale correlations based upon the average interitem correlation, otherwise return NA. If scores=TRUE, impute missing data by means, medians, or none.} \item{av.r}{Should the average r be used in correcting for overlap? smcs otherwise.} \item{select}{By default, just find statistics for items included in the scoring keys. This allows for finding scores from matrices with bad items if they are not included in the set of scoring keys.} \item{min.n}{The minimum number of pairwise observations needed to define a correlation (in scoreBy)} \item{smooth}{If the matrices used in scoreBy are not positive semi-definite, should we smooth them?} \item{scores}{Find the raw average scores for each subject. Same functionality as \code{\link{scoreItems}}} \item{min}{May be specified as minimum item score allowed, else will be calculated from data. min and max should be specified if items differ in their possible minima or maxima. See notes for details.} \item{max}{May be specified as maximum item score allowed, else will be calculated from data. } } \details{These are three of the functions used in the SAPA (\url{https://www.sapa-project.org/}) procedures to form synthetic correlation matrices. Given any correlation matrix of items, it is easy to find the correlation matrix of scales made up of those items. This can also be done from the original data matrix or from the correlation matrix using \code{\link{scoreItems}} which is probably preferred unless the keys are overlapping. It is important to remember with SAPA data, that scale correlations should be found from the item correlations, not the raw data. In the case of overlapping keys, (items being scored on multiple scales), \code{\link{scoreOverlap}} will adjust for this overlap by replacing the overlapping covariances (which are variances when overlapping) with the corresponding best estimate of an item's ``true" variance using either the average correlation or the smc estimate for that item. This parallels the operation done when finding alpha reliability. This is similar to ideas suggested by Cureton (1966) and Bashaw and Anderson (1966) but uses the smc or the average interitem correlation (default). A typical use in the SAPA project is to form item composites by clustering or factoring (see \code{\link{fa}}, \code{\link{ICLUST}}, \code{\link{principal}}), extract the clusters from these results (\code{\link{factor2cluster}}), and then form the composite correlation matrix using \code{\link{cluster.cor}}. The variables in this reduced matrix may then be used in multiple correlatin procedures using \code{\link{mat.regress}}. The original correlation is pre and post multiplied by the (transpose) of the keys matrix. If some correlations are missing from the original matrix this will lead to missing values (NA) for scale intercorrelations based upon those lower level correlations. If impute=TRUE (the default), a warning is issued and the correlations are imputed based upon the average correlations of the non-missing elements of each scale. Because the alpha estimate of reliability is based upon the correlations of the items rather than upon the covariances, this estimate of alpha is sometimes called ``standardized alpha". If the raw items are available, it is useful to compare standardized alpha with the raw alpha found using \code{\link{scoreItems}}. They will differ substantially only if the items differ a great deal in their variances. The MIMS matrix reflects the average correlations of the Multiple Items within each of the Multiple Scales. This has been suggested as a measure of scale validity. These correlations are adjusted for item overlap. \code{\link{scoreOverlap}} answers an important question when developing scales and related subscales, or when comparing alternative versions of scales. For by removing the effect of item overlap, it gives a better estimate the relationship between the latent variables estimated by the observed sum (mean) scores. \code{\link{scoreBy}} finds the within subject correlations after preprocessing with \code{\link{statsBy}}. This is useful if doing an ESM study with multiple occasions for each subject. It also makes it possible to find the correlations for subsets of subjects. See the example. Note that it likely that for ESM data with a high level of missingness that the correlation matrices will not be positive-semi-definite. This can lead to composite score correlations that exceed 1. Smoothing will resolve this problem. \code{\link{scoreBy}} is useful when examining multi-level models where we want to examine the correlations within subjects (e.g., for ESM data) or within groups of subjects (when examining the stability of correlational structures by subgroups). For both cases the data must be processed first by \code{\link{statsBy}}. To find the variances of the scales it is necessary to use the cor="cov" option in \code{\link{statsBy}}. Starting in version 2.4.1, the scores option has been added. This will score the data (as does \code{\link{scoreItems}}) if it is not a correlation matrix but does not have all the options of \code{\link{scoreItems}}. Obviously, while the correlations can be corrected for item ovelap, the raw scores can not be so corrected. } \value{ \item{cor }{the (raw) correlation matrix of the clusters} \item{sd }{standard deviation of the cluster scores} \item{corrected }{raw correlations below the diagonal, alphas on diagonal, disattenuated above diagonal} \item{alpha}{The (standardized) alpha reliability of each scale.} \item{G6}{Guttman's Lambda 6 reliability estimate is based upon the smcs for each item in a scale. G6 uses the smc based upon the entire item domain.} \item{av.r}{The average inter item correlation within a scale} \item{size}{How many items are in each cluster?} \item{MIMS}{A matrix of the average correlations, corrected for overlap, between the various scales.} } \references{ Bashaw, W. and Anderson Jr, H. E. (1967). A correction for replicated error in correlation coefficients. Psychometrika, 32(4):435-441. Cureton, E. (1966). Corrected item-test correlations. Psychometrika, 31(1):93-96. } \author{ Maintainer: William Revelle \email{revelle@northwestern.edu} } \note{ See SAPA Revelle, W., Wilt, J., and Rosenthal, A. (2010) Personality and Cognition: The Personality-Cognition Link. In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. The second example uses the \code{\link[psychTools]{msq}} data set of 72 measures of motivational state to examine the overlap between four lower level scales and two higher level scales. } \seealso{ \code{\link{factor2cluster}}, \code{\link{mat.regress}}, \code{\link{alpha}}, and most importantly, \code{\link{scoreItems}}, which will do all of what cluster.cor does for most users. cluster.cor is an important helper function for \code{\link{iclust}} } \examples{ #use the msq data set that shows the structure of energetic and tense arousal small.msq <- psychTools::msq[ c("active", "energetic", "vigorous", "wakeful", "wide.awake", "full.of.pep", "lively", "sleepy", "tired", "drowsy","intense", "jittery", "fearful", "tense", "clutched.up", "quiet", "still", "placid", "calm", "at.rest") ] small.R <- cor(small.msq,use="pairwise") keys.list <- list( EA = c("active", "energetic", "vigorous", "wakeful", "wide.awake", "full.of.pep", "lively", "-sleepy", "-tired", "-drowsy"), TA =c("intense", "jittery", "fearful", "tense", "clutched.up", "-quiet", "-still", "-placid", "-calm", "-at.rest") , high.EA = c("active", "energetic", "vigorous", "wakeful", "wide.awake", "full.of.pep", "lively"), low.EA =c("sleepy", "tired", "drowsy"), lowTA= c("quiet", "still", "placid", "calm", "at.rest"), highTA = c("intense", "jittery", "fearful", "tense", "clutched.up") ) keys <- make.keys(small.R,keys.list) adjusted.scales <- scoreOverlap(keys.list,small.R) #compare with unadjusted confounded.scales <- cluster.cor(keys,small.R) summary(adjusted.scales) #note that the EA and high and low EA and TA and high and low TA # scale correlations are confounded summary(confounded.scales) bfi.stats <- statsBy(bfi,group="education",cors=TRUE ,cor="cov") #specify to find covariances bfi.plus.keys <- c(bfi.keys,gender="gender",age ="age") bfi.by <- scoreBy(bfi.plus.keys,bfi.stats) bfi.by$var #to show the variances of each scale by groupl round(bfi.by$cor.mat,2) #the correlations by group bfi.by$alpha } \keyword{ multivariate } \keyword{ models } psych/man/Promax.Rd0000644000176200001440000002042013440257322013710 0ustar liggesusers\name{Promax} \alias{Promax} \alias{faRotate} \alias{Procrustes} \alias{TargetQ} \alias{TargetT} \alias{target.rot} \alias{bifactor} \alias{biquartimin} \alias{varimin} \alias{vgQ.bimin} \alias{vgQ.targetQ} \alias{vgQ.varimin} \alias{equamax} \title{ Perform Procustes,bifactor, promax or targeted rotations and return the inter factor angles.} \description{The bifactor rotation implements the rotation introduced by Jennrich and Bentler (2011) by calling GPForth in the GPArotation package. promax is an oblique rotation function introduced by Hendrickson and White (1964) and implemented in the promax function in the stats package. Unfortunately, promax does not report the inter factor correlations. Promax does. TargetQ does a target rotation with elements that can be missing (NA), or numeric (e.g., 0, 1). It uses the GPArotation package. target.rot does general target rotations to an arbitrary target matrix. The default target rotation is for an independent cluster solution. equamax facilitates the call to GPArotation to do an equamax rotation. Equamax, although available as a specific option within GPArotation is easier to call by name if using equamax. The varimin rotation suggested by Ertl (2013) is implemented by appropriate calls to GPArotation. } \usage{ faRotate(loadings,rotate="oblimin",...) bifactor(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000) biquartimin(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000) TargetQ(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000,Target=NULL) TargetT(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000,Target=NULL) Promax(x,m=4, normalize=FALSE, pro.m = 4) Procrustes(L,Target) #adapted from Niels Waler target.rot(x,keys=NULL) varimin(L, Tmat = diag(ncol(L)), normalize = FALSE, eps = 1e-05, maxit = 1000) vgQ.bimin(L) #called by bifactor vgQ.targetQ(L,Target=NULL) #called by TargetQ vgQ.varimin(L) #called by varimin equamax(L, Tmat=diag(ncol(L)), eps=1e-5, maxit=1000) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A loadings matrix} \item{L}{A loadings matrix} \item{loadings}{A loadings matrix} \item{rotate}{Which rotation should be used?} \item{m}{the power to which to raise the varimax loadings (for Promax)} \item{pro.m}{the power to which to raise the various loadings in Promax.} \item{keys}{An arbitrary target matrix, can be composed of any weights, but probably -1,0, 1 weights. If missing, the target is the independent cluster structure determined by assigning every item to it's highest loaded factor.} \item{Target}{A matrix of values (mainly 0s, some 1s, some NAs) to which the matrix is transformed.} \item{Tmat}{An initial rotation matrix} \item{normalize}{parameter passed to optimization routine (GPForth in the GPArotation package and Promax)} \item{eps}{parameter passed to optimization routine (GPForth in the GPArotation package) } \item{maxit}{parameter passed to optimization routine (GPForth in the GPArotation package)} \item{...}{Other parameters to pass (e.g. to faRotate) include a Target list or matrix} } \details{The two most useful of these functions is probably biquartimin which implements the oblique bifactor rotation introduced by Jennrich and Bentler (2011). The second is TargetQ which allows for missing NA values in the target. Next best is the orthogonal case, bifactor. None of these seem to be implemented in GPArotation (yet). TargetT is an orthogonal target rotation function which allows for missing NA values in the target. faRotate is merely a convenient way to call the various GPArotation functions as well as the additional ones added here. The difference between biquartimin and bifactor is just that the latter is the orthogonal case which is documented in Jennrich and Bentler (2011). It seems as if these two functions are sensitive to the starting values and random restarts (modifying T) might be called for. bifactor output for the 24 cognitive variable of Holzinger matches that of Jennrich and Bentler as does output for the Chen et al. problem when fm="mle" is used and the Jennrich and Bentler solution is rescaled from covariances to correlations. Promax is a very direct adaptation of the stats::promax function. The addition is that it will return the interfactor correlations as well as the loadings and rotation matrix. varimin implements the varimin criterion proposed by Suitbert Ertl (2013). Rather than maximize the varimax criterion, it minimizes it. For a discussion of the benefits of this procedure, consult Ertel (2013). In addition, these functions will take output from either the factanal, \code{\link{fa}} or earlier (\code{\link{factor.pa}}, \code{\link{factor.minres}} or \code{\link{principal}}) functions and select just the loadings matrix for analysis. equamax is just a call to GPArotation's cFT function (for the Crawford Ferguson family of rotations. TargetQ implements Michael Browne's algorithm and allows specification of NA values. The Target input is a list (see examples). It is interesting to note how powerful specifying what a factor isn't works in defining a factor. That is, by specifying the pattern of 0s and letting most other elements be NA, the factor structure is still clearly defined. The target.rot function is an adaptation of a function of Michael Browne's to do rotations to arbitrary target matrices. Suggested by Pat Shrout. The default for target.rot is to rotate to an independent cluster structure (every items is assigned to a group with its highest loading.) target.rot will not handle targets that have linear dependencies (e.g., a pure bifactor model where there is a g loading and a group factor for all variables). } \value{ \item{loadings }{Oblique factor loadings} \item{rotmat}{The rotation matrix applied to the original loadings to produce the promax solution or the targeted matrix} \item{Phi}{The interfactor correlation matrix} } \references{ Ertel, S. (2013). Factor analysis: healing an ailing model. Universitatsverlag Gottingen. Hendrickson, A. E. and White, P. O, 1964, British Journal of Statistical Psychology, 17, 65-70. Jennrich, Robert and Bentler, Peter (2011) Exploratory Bi-Factor Analysis. Psychometrika, 1-13 } \author{William Revelle } \note{Promax is direct adaptation of the stats:promax function following suggestions to the R-help list by Ulrich Keller and John Fox. Further modified to do targeted rotation similar to a function of Michael Browne. varimin is a direct application of the GPArotation GPForth function modified to do varimin. } \note{The Target for TargetT can be a matrix, but for TartetQ must be a list. This seems to be a feature of GPArotation. } \seealso{ \code{\link{promax}}, \code{\link{fa}}, or \code{\link{principal}} for examples of data analysis and \code{\link{Holzinger}} or \code{\link{Bechtoldt}} for examples of bifactor data. \code{\link{factor.rotate}} for 'hand rotation'. } \examples{ jen <- sim.hierarchical() f3 <- fa(jen,3,rotate="varimax") f3 #not a very clean solution Promax(f3) #this obliquely rotates, but from the varimax target target.rot(f3) #this obliquely rotates to wards a simple structure target #compare this rotation with the solution from a targeted rotation aimed for #an independent cluster solution #now try a bifactor solution fb <-fa(jen,3,rotate="bifactor") fq <- fa(jen,3,rotate="biquartimin") #Suitbert Ertel has suggested varimin fm <- fa(jen,3,rotate="varimin") #the Ertel varimin fn <- fa(jen,3,rotate="none") #just the unrotated factors #compare them factor.congruence(list(f3,fb,fq,fm,fn)) # compare an oblimin with a target rotation using the Browne algorithm #note that we are changing the factor #order (this is for demonstration only) Targ <- make.keys(9,list(f1=1:3,f2=7:9,f3=4:6)) Targ <- scrub(Targ,isvalue=1) #fix the 0s, allow the NAs to be estimated Targ <- list(Targ) #input must be a list #show the target Targ fa(Thurstone,3,rotate="TargetQ",Target=Targ) #targeted oblique rotation #compare with oblimin f3 <- fa(Thurstone,3) #now try a targeted orthogonal rotation Targ <- make.keys(9,list(f1=1:3,f2=7:9,f3=4:6)) faRotate(f3$loadings,rotate="TargetT",Target=list(Targ)) #orthogonal } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models }% __ONLY ONE__ keyword per line psych/man/sim.omega.Rd0000644000176200001440000003444214414343330014327 0ustar liggesusers\name{sim.omega} \alias{sim.omega} \alias{sim.general} \alias{sim.parallel} \title{Further functions to simulate psychological/psychometric data. } \description{A number of functions in the psych package will generate simulated data with particular structures. These functions include \code{\link{sim}} for a factor simplex, and \code{\link{sim.simplex}} for a data simplex, \code{\link{sim.circ}} for a circumplex structure, \code{\link{sim.congeneric}} for a one factor factor congeneric model, \code{\link{sim.dichot}} to simulate dichotomous items, \code{\link{sim.hierarchical}} to create a hierarchical factor model, \code{\link{sim.item}} a more general item simulation, \code{\link{sim.minor}} to simulate major and minor factors, \code{\link{sim.omega}} to test various examples of omega, \code{\link{sim.parallel}} to compare the efficiency of various ways of deterimining the number of factors. See the help pages for some more simulation functions here: \code{\link{sim.rasch}} to create simulated rasch data, \code{\link{sim.irt}} to create general 1 to 4 parameter IRT data by calling \code{\link{sim.npl}} 1 to 4 parameter logistic IRT or \code{\link{sim.npn}} 1 to 4 paramater normal IRT, \code{\link{sim.poly}} to create polytomous ideas by calling \code{\link{sim.poly.npn}} 1-4 parameter polytomous normal theory items or \code{\link{sim.poly.npl}} 1-4 parameter polytomous logistic items, and \code{\link{sim.poly.ideal}} which creates data following an ideal point or unfolding model by calling \code{\link{sim.poly.ideal.npn}} 1-4 parameter polytomous normal theory ideal point model or \code{\link{sim.poly.ideal.npl}} 1-4 parameter polytomous logistic ideal point model. \code{\link{sim.structural}} a general simulation of structural models, and \code{\link{sim.anova}} for ANOVA and lm simulations, and \code{\link{sim.VSS}}. Some of these functions are separately documented and are listed here for ease of the help function. See each function for more detailed help. } \usage{ sim.omega(nvar = 12, nfact = 3, n = 500, g = NULL, sem = FALSE, fbig = NULL, fsmall = c(-0.2, 0.2),bipolar = TRUE, om.fact = 3, flip = TRUE, option = "equal", ntrials = 10,threshold=NULL) sim.parallel(ntrials=10,nvar = c(12,24,36,48),nfact = c(1,2,3,4,6), n = c(200,400)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{nvar}{Number of variables} \item{nfact}{Number of group factors in sim.general, sim.omega} \item{n}{ Number of cases to simulate. If n=0 or NULL, the population matrix is returned.} \item{g}{General factor correlations in sim.general and general factor loadings in sim.omega and sim.minor} \item{sem}{Should the sim.omega function do both an EFA omega as well as a CFA omega using the sem package?} \item{fbig}{Factor loadings for the main factors. Default is a simple structure with loadings sampled from (.8,.6) for nvar/nfact variables and 0 for the remaining. If fbig is specified, then each factor has loadings sampled from it.} \item{fsmall}{nvar/2 small factors are generated with loadings sampled from (-.2,0,.2)} \item{bipolar}{if TRUE, then positive and negative loadings are generated from fbig} \item{om.fact}{Number of factors to extract in omega} \item{flip}{In omega, should item signs be flipped if negative} \item{option}{In omega, for the case of two factors, how to weight them?} \item{threshold}{If raw=TRUE (n >0) and threshold is not NULL, convert the data to binary values, cut at threshold.} \item{ntrials}{Number of replications per level} } \details{Simulation of data structures is a very useful tool in psychometric research and teaching. By knowing ``truth" it is possible to see how well various algorithms can capture it. For a much longer discussion of the use of simulation in psychometrics, see the accompany vignettes. There are a number of simulation functions included in psych. sim.omega is just one of them. The default values for \code{\link{sim.structure}} is to generate a 4 factor, 12 variable data set with a simplex structure between the factors. This, and the simplex of items (\code{\link{sim.simplex}}) can also be converted in a STARS model with an autoregressive component (alpha) and a stable trait component (lambda). Two data structures that are particular challenges to exploratory factor analysis are the simplex structure and the presence of minor factors. Simplex structures \code{\link{sim.simplex}} will typically occur in developmental or learning contexts and have a correlation structure of r between adjacent variables and r^n for variables n apart. Although just one latent variable (r) needs to be estimated, the structure will have nvar-1 factors. An alternative version of the simplex is the State-Trait-Auto Regressive Structure (STARS) which has both a simplex state structure, with autoregressive path alpha and a trait structure with path lambda. This simulated in \code{\link{sim.simplex}} by specifying a non-zero lambda value. Many simulations of factor structures assume that except for the major factors, all residuals are normally distributed around 0. An alternative, and perhaps more realistic situation, is that the there are a few major (big) factors and many minor (small) factors. The challenge is thus to identify the major factors. \code{\link{sim.minor}} generates such structures. The structures generated can be thought of as havinga a major factor structure with some small correlated residuals. To make these simulations complete, the possibility of a general factor is considered. For simplicity, sim.minor allows one to specify a set of loadings to be sampled from for g, fmajor and fminor. Alternatively, it is possible to specify the complete factor matrix. Another structure worth considering is direct modeling of a general factor with several group factors. This is done using \code{\link{sim.general}}. Although coefficient \eqn{\omega}{\omega} is a very useful indicator of the general factor saturation of a unifactorial test (one with perhaps several sub factors), it has problems with the case of multiple, independent factors. In this situation, one of the factors is labelled as ``general'' and the omega estimate is too large. This situation may be explored using the \code{\link{sim.omega}} function with general left as NULL. If there is a general factor, then results from \code{\link{sim.omega}} suggests that omega estimated either from EFA or from SEM does a pretty good job of identifying it but that the EFA approach using Schmid-Leiman transformation is somewhat more robust than the SEM approach. The four irt simulations, sim.rasch, sim.irt, sim.npl and sim.npn, simulate dichotomous items following the Item Response model. sim.irt just calls either sim.npl (for logistic models) or sim.npn (for normal models) depending upon the specification of the model. The logistic model is \deqn{P(i,j) = \gamma + \frac{\zeta-\gamma}{1+ e^{\alpha(\delta-\theta)}}}{P(i,j) = \gamma + (\zeta-\gamma)/(1+ exp(\alpha(\delta-\theta)))} where \eqn{\gamma} is the lower asymptote or guesssing parameter, \eqn{\zeta} is the upper asymptote (normally 1), \eqn{\alpha} is item discrimination and \eqn{\delta} is item difficulty. For the 1 Paramater Logistic (Rasch) model, gamma=0, zeta=1, alpha=1 and item difficulty is the only free parameter to specify. For the 2PL and 2PN models, a = \eqn{\alpha} and d = \eqn{\delta} are specified. \cr For the 3PL or 3PN models, items also differ in their guessing parameter c =\eqn{\gamma}. \cr For the 4PL and 4PN models, the upper asymptote, z= \eqn{\zeta} is also specified. \cr (Graphics of these may be seen in the demonstrations for the \code{\link{logistic}} function.) The normal model (irt.npn calculates the probability using pnorm instead of the logistic function used in irt.npl, but the meaning of the parameters are otherwise the same. With the a = \eqn{\alpha} parameter = 1.702 in the logistic model the two models are practically identical. In parallel to the dichotomous IRT simulations are the poly versions which simulate polytomous item models. They have the additional parameter of how many categories to simulate. In addition, the \code{\link{sim.poly.ideal}} functions will simulate an ideal point or unfolding model in which the response probability varies by the distance from each subject's ideal point. Some have claimed that this is a more appropriate model of the responses to personality questionnaires. It will lead to simplex like structures which may be fit by a two factor model. The middle items form one factor, the extreme a bipolar factor. By default, the theta parameter is created in each function as normally distributed with mean mu=0 and sd=1. In the case where you want to specify the theta to be equivalent from another simulation or fixed for a particular experimental condition, either take the theta object from the output of a previous simulation, or create it using whatever properties are desired. The previous functions all assume one latent trait. Alternatively, we can simulate dichotomous or polytomous items with a particular structure using the sim.poly.mat function. This takes as input the population correlation matrix, the population marginals, and the sample size. It returns categorical items with the specified structure. Other simulation functions in psych are: \code{\link{sim.structure}} A function to combine a measurement and structural model into one data matrix. Useful for understanding structural equation models. Combined with \code{\link{structure.diagram}} to see the proposed structure. \code{\link{sim.congeneric}} A function to create congeneric items/tests for demonstrating classical test theory. This is just a special case of sim.structure. \code{\link{sim.hierarchical}} A function to create data with a hierarchical (bifactor) structure. \code{\link{sim.item}} A function to create items that either have a simple structure or a circumplex structure. \code{\link{sim.circ}} Create data with a circumplex structure. \code{\link{sim.dichot}} Create dichotomous item data with a simple or circumplex structure. \code{\link{sim.minor}} Create a factor structure for nvar variables defined by nfact major factors and nvar/2 ``minor" factors for n observations. Although the standard factor model assumes that K major factors (K << nvar) will account for the correlations among the variables \deqn{R = FF' + U^2} where R is of rank P and F is a P x K matrix of factor coefficients and U is a diagonal matrix of uniquenesses. However, in many cases, particularly when working with items, there are many small factors (sometimes referred to as correlated residuals) that need to be considered as well. This leads to a data structure such that \deqn{R = FF' + MM' + U^2} where R is a P x P matrix of correlations, F is a P x K factor loading matrix, M is a P x P/2 matrix of minor factor loadings, and U is a diagonal matrix (P x P) of uniquenesses. Such a correlation matrix will have a poor \eqn{\chi^2} value in terms of goodness of fit if just the K factors are extracted, even though for all intents and purposes, it is well fit. sim.minor will generate such data sets with big factors with loadings of .6 to .8 and small factors with loadings of -.2 to .2. These may both be adjusted. \code{\link{sim.parallel}} Create a number of simulated data sets using sim.minor to show how parallel analysis works. The general observation is that with the presence of minor factors, parallel analysis is probably best done with component eigen values rather than factor eigen values, even when using the factor model. \code{\link{sim.anova}} Simulate a 3 way balanced ANOVA or linear model, with or without repeated measures. Useful for teaching research methods and generating teaching examples. \code{\link{sim.multilevel}} To understand some of the basic concepts of multilevel modeling, it is useful to create multilevel structures. The correlations of aggregated data is sometimes called an 'ecological correlation'. That group level and individual level correlations are independent makes such inferences problematic. This simulation allows for demonstrations that correlations within groups do not imply, nor are implied by, correlations between group means. } \value{\code{link{sim.parallel}} returns the results of multiple simulations of fa.parallel for various combinations of the number of variables, numbers of factors, and sample size. \code{link{sim.general}} returns either a correlation matrix (if n is not specified) or a data matrix with a general factor. } \references{ \url{https://personality-project.org/r/r.omega.html} \cr Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Revelle, W. (1979). Hierarchical cluster analysis and the internal structure of tests. Multivariate Behavioral Research, 14, 57-74. (\url{https://personality-project.org/revelle/publications/iclust.pdf}) Revelle, W. and Zinbarg, R. E. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 74, 1, 145-154. (\url{https://personality-project.org/revelle/publications/rz09.pdf} Waller, N. G. (2017) Direct {Schmid-Leiman} Transformations and Rank-Deficient Loadings Matrices. Psychometrika. DOI: 10.1007/s11336-017-9599-0 Zinbarg, R.E., Revelle, W., Yovel, I., & Li. W. (2005). Cronbach's Alpha, Revelle's Beta, McDonald's Omega: Their relations with each and two alternative conceptualizations of reliability. Psychometrika. 70, 123-133. \url{https://personality-project.org/revelle/publications/zinbarg.revelle.pmet.05.pdf} Zinbarg, R., Yovel, I. & Revelle, W. (2007). Estimating omega for structures containing two group factors: Perils and prospects. Applied Psychological Measurement. 31 (2), 135-157. Zinbarg, R., Yovel, I., Revelle, W. & McDonald, R. (2006). Estimating generalizability to a universe of indicators that all have one attribute in common: A comparison of estimators for omega. Applied Psychological Measurement, 30, 121-144. DOI: 10.1177/0146621605278814 } \author{ \url{https://personality-project.org/revelle.html} \cr Maintainer: William Revelle \email{ revelle@northwestern.edu } } \note{A few of the various sim functions are included here. } \seealso{\code{\link{omega}} } \examples{ #test <- sim.omega() } \keyword{ multivariate} \keyword{datagen} psych/man/diagram.Rd0000644000176200001440000001773114360317174014065 0ustar liggesusers\name{diagram} \Rdversion{1.1} \alias{diagram} \alias{dia.rect} \alias{dia.ellipse} \alias{dia.ellipse1} \alias{dia.arrow} \alias{dia.curve} \alias{dia.curved.arrow} \alias{dia.self} \alias{dia.shape} \alias{dia.triangle} \alias{dia.cone} \alias{multi.rect} \alias{multi.arrow} \alias{multi.curved.arrow} \alias{multi.self} \title{Helper functions for drawing path model diagrams} \description{Path models are used to describe structural equation models or cluster analytic output. These functions provide the primitives for drawing path models. Used as a substitute for some of the functionality of Rgraphviz.} \usage{ diagram(fit,...) dia.rect(x, y = NULL, labels = NULL, cex = 1, xlim = c(0, 1), ylim = c(0, 1), draw=TRUE, ...) dia.ellipse(x, y = NULL, labels = NULL, cex=1,e.size=.05, xlim=c(0,1), ylim=c(0,1),draw=TRUE, ...) dia.triangle(x, y = NULL, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),...) dia.ellipse1(x,y,e.size=.05,xlim=c(0,1),ylim=c(0,1),draw=TRUE,...) dia.shape(x, y = NULL, labels = NULL, cex = 1, e.size=.05, xlim=c(0,1), ylim=c(0,1), shape=1, ...) dia.arrow(from,to,labels=NULL,scale=1,cex=1,adj=2,both=FALSE,pos=NULL,l.cex, gap.size,draw=TRUE,col="black",lty="solid",...) dia.curve(from,to,labels=NULL,scale=1,...) dia.curved.arrow(from,to,labels=NULL,scale=1,both=FALSE,dir=NULL,draw=TRUE,...) dia.self(location,labels=NULL,scale=.8,side=2,draw=TRUE,...) dia.cone(x=0, y=-2, theta=45, arrow=TRUE,curves=TRUE,add=FALSE,labels=NULL, xlim = c(-1, 1), ylim=c(-1,1),... ) multi.self(self.list,...) multi.arrow(arrows.list,...) multi.curved.arrow(curved.list,l.cex=NULL,...) multi.rect(rect.list,...) } \arguments{ \item{fit}{The results from a factor analysis \code{\link{fa}}, components analysis \code{\link{principal}}, omega reliability analysis, \code{\link{omega}}, cluster analysis \code{\link{iclust}}, topdown (bassAckward) \code{\link{bassAckward}} or confirmatory factor analysis, cfa, or structural equation model,sem, using the lavaan package.} \item{x}{x coordinate of a rectangle or ellipse} \item{y}{y coordinate of a rectangle or ellipse} \item{e.size}{The size of the ellipse (scaled by the number of variables} \item{labels}{Text to insert in rectangle, ellipse, or arrow} \item{cex}{adjust the text size} \item{col}{line color (normal meaning for plot figures)} \item{lty}{line type} \item{l.cex}{Adjust the text size in arrows, defaults to cex which in turn defaults to 1} \item{gap.size}{Tweak the gap in an arrow to be allow the label to be in a gap} \item{adj}{Where to put the label along the arrows (values are then divided by 4)} \item{both}{Should the arrows have arrow heads on both ends?} \item{scale}{modifies size of rectangle and ellipse as well as the curvature of curves. (For curvature, positive numbers are concave down and to the left} \item{from}{arrows and curves go from } \item{to}{arrows and curves go to} \item{location}{where is the rectangle?} \item{shape}{Which shape to draw} \item{xlim}{default ranges} \item{ylim}{default ranges} \item{draw}{Draw the text box} \item{side}{Which side of boxes should errors appear} \item{theta}{Angle in degrees of vectors} \item{arrow}{draw arrows for edges in dia.cone} \item{add}{if TRUE, plot on previous plot} \item{curves}{if TRUE, draw curves between arrows in dia.cone} \item{pos}{The position of the text in . Follows the text positions of 1, 2, 3, 4 or NULL} \item{dir}{Should the direction of the curve be calculated dynamically, or set as "up" or "left"} \item{\dots}{Most graphic parameters may be passed here} \item{self.list}{list saved from dia.self} \item{arrows.list}{lst saved from dia.arrow} \item{curved.list}{list saved from dia.curved.arrow} \item{rect.list}{list saved from dia.rect} } \details{The diagram function calls \code{\link{fa.diagram}}, \code{\link{omega.diagram}}, \code{\link{ICLUST.diagram}}, \code{\link{lavaan.diagram}} or \code{\link{bassAckward}}.diagram depending upon the class of the fit input. See those functions for particular parameter values. The remaining functions are the graphic primitives used by \code{\link{fa.diagram}}, \code{\link{structure.diagram}}, \code{\link{omega.diagram}}, \code{\link{ICLUST.diagram}} and \code{\link{het.diagram}} They create rectangles, ellipses or triangles surrounding text, connect them to straight or curved arrows, and can draw an arrow from and to the same rectangle. To speed up the plotting, dia.rect and dia.arrow can suppress the actual drawing and return the locations and values to plot. These values can then be directly called by text or rect with matrix input. This leads to an impressive increase in speed when doing many variables. The functions \code{\link{multi.rect}}, \code{\link{multi.self}}, \code{\link{multi.arrow}} and \code{\link{multi.curved.arrow}} will take the saved output from the appropriate primitives and then draw them all at once. Each shape (ellipse, rectangle or triangle) has a left, right, top and bottom and center coordinate that may be used to connect the arrows. Curves are double-headed arrows. By default they go from one location to another and curve either left or right (if going up or down) or up or down (going left to right). The direction of the curve may be set by dir="up" for left right curvature. The helper functions were developed to get around the infelicities associated with trying to install Rgraphviz and graphviz. These functions form the core of \code{\link{fa.diagram}},\code{\link{het.diagram}}. Better documentation will be added as these functions get improved. Currently the helper functions are just a work around for Rgraphviz. dia.cone draws a cone with (optionally) arrows as sides and centers to show the problem of factor indeterminacy. } \value{Graphic output} \author{William Revelle } \seealso{The diagram functions that use the dia functions: \code{\link{fa.diagram}}, \code{\link{structure.diagram}}, \code{\link{omega.diagram}}, and \code{\link{ICLUST.diagram}}. } \examples{ #first, show the primitives xlim=c(-2,10) ylim=c(0,10) plot(NA,xlim=xlim,ylim=ylim,main="Demonstration of diagram functions",axes=FALSE,xlab="",ylab="") ul <- dia.rect(1,9,labels="upper left",xlim=xlim,ylim=ylim) ml <- dia.rect(1,6,"middle left",xlim=xlim,ylim=ylim) ll <- dia.rect(1,3,labels="lower left",xlim=xlim,ylim=ylim) bl <- dia.rect(1,1,"bottom left",xlim=xlim,ylim=ylim) lr <- dia.ellipse(7,3,"lower right",xlim=xlim,ylim=ylim,e.size=.07) ur <- dia.ellipse(7,9,"upper right",xlim=xlim,ylim=ylim,e.size=.07) mr <- dia.ellipse(7,6,"middle right",xlim=xlim,ylim=ylim,e.size=.07) lm <- dia.triangle(4,1,"Lower Middle",xlim=xlim,ylim=ylim) br <- dia.rect(9,1,"bottom right",xlim=xlim,ylim=ylim) dia.curve(from=ul$left,to=bl$left,"double headed",scale=-1) dia.arrow(from=lr,to=ul,labels="right to left") dia.arrow(from=ul,to=ur,labels="left to right") dia.curved.arrow(from=lr,to=ll,labels ="right to left") dia.curved.arrow(to=ur,from=ul,labels ="left to right") dia.curve(ll$top,ul$bottom,"right") #for rectangles, specify where to point dia.curve(ll$top,ul$bottom,"left",scale=-1) #for rectangles, specify where to point dia.curve(mr,ur,"up") #but for ellipses, you may just point to it. dia.curve(mr,lr,"down") dia.curve(mr,ur,"up") dia.curved.arrow(mr,ur,"up") #but for ellipses, you may just point to it. dia.curved.arrow(mr,lr,"down") #but for ellipses, you may just point to it. dia.curved.arrow(ur$right,mr$right,"3") dia.curve(ml,mr,"across") dia.curve(ur$right,lr$right,"top down",scale =2) dia.curved.arrow(br$top,lr$right,"up") dia.curved.arrow(bl,br,"left to right") dia.curved.arrow(br,bl,"right to left",scale=-1) dia.arrow(bl,ll$bottom) dia.curved.arrow(ml,ll$right) dia.curved.arrow(mr,lr$top) #now, put them together in a factor analysis diagram v9 <- sim.hierarchical() f3 <- fa(v9,3,rotate="cluster") fa.diagram(f3,error=TRUE,side=3) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{hplot }% __ONLY ONE__ keyword per line psych/man/mixed.cor.Rd0000644000176200001440000002131314503602004014324 0ustar liggesusers\name{mixedCor} \alias{mixedCor} \alias{mixed.cor} \title{Find correlations for mixtures of continuous, polytomous, and dichotomous variables} \description{For data sets with continuous, polytomous and dichotmous variables, the absolute Pearson correlation is downward biased from the underlying latent correlation. mixedCor finds Pearson correlations for the continous variables, \code{\link{polychoric}}s for the polytomous items, \code{\link{tetrachoric}}s for the dichotomous items, and the \code{\link{polyserial}} or \code{\link{biserial}} correlations for the various mixed variables. Results include the complete correlation matrix, as well as the separate correlation matrices and difficulties for the polychoric and tetrachoric correlations. } \usage{ mixedCor(data=NULL,c=NULL,p=NULL,d=NULL,smooth=TRUE,correct=.5,global=TRUE,ncat=8, use="pairwise",method="pearson",weight=NULL) mixed.cor(x = NULL, p = NULL, d=NULL,smooth=TRUE, correct=.5,global=TRUE, ncat=8,use="pairwise",method="pearson",weight=NULL) #deprecated } \arguments{ \item{data}{The data set to be analyzed (either a matrix or dataframe)} \item{c}{The names (or locations) of the continuous variables) (may be missing)} \item{x}{A set of continuous variables (may be missing) or, if p and d are missing, the variables to be analyzed.} \item{p}{A set of polytomous items (may be missing)} \item{d}{A set of dichotomous items (may be missing)} \item{smooth}{If TRUE, then smooth the correlation matix if it is non-positive definite} \item{correct}{When finding tetrachoric correlations, what value should be used to correct for continuity?} \item{global}{For polychorics, should the global values of the tau parameters be used, or should the pairwise values be used. Set to local if errors are occurring.} \item{ncat}{The number of categories beyond which a variable is considered "continuous".} \item{use}{The various options to the \code{\link{cor}} function include "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs". The default here is "pairwise"} \item{method}{The correlation method to use for the continuous variables. "pearson" (default), "kendall", or "spearman"} \item{weight}{If specified, this is a vector of weights (one per participant) to differentially weight participants. The NULL case is equivalent of weights of 1 for all cases. } } \details{ This function is particularly useful as part of the Synthetic Apeture Personality Assessment (SAPA) (\url{https://www.sapa-project.org/}) data sets where continuous variables (age, SAT V, SAT Q, etc) and mixed with polytomous personality items taken from the International Personality Item Pool (IPIP) and the dichotomous experimental IQ items that have been developed as part of SAPA (see, e.g., Revelle, Wilt and Rosenthal, 2010 or Revelle, Dworak and Condon, 2020.). This is a very computationally intensive function which can be speeded up considerably by using multiple cores and using the parallel package. (See the note for timing comparisons.) This adjusts the number of cores to use when doing polychoric or tetrachoric. The greatest step in speed is going from 1 core to 2. This is about a 50\% savings. Going to 4 cores seems to have about at 66\% savings, and 8 a 75\% savings. The number of parallel processes defaults to 2 but can be modified by using the \code{\link{options}} command: options("mc.cores"=4) will set the number of cores to 4. Item response analyses using \code{\link{irt.fa}} may be done separately on the polytomous and dichotomous items in order to develop internally consistent scales. These scale may, in turn, be correlated with each other using the complete correlation matrix found by mixed.cor and using the \code{\link{score.items}} function. This function is not quite as flexible as the hetcor function in John Fox's polychor package. Note that the variables may be organized by type of data: continuous, polytomous, and dichotomous. This is done by simply specifying c, p, and d. This is advantageous in the case of some continuous variables having a limited number of categories because of subsetting. \code{\link{mixedCor}} is essentially a wrapper for \code{\link{cor}}, \code{\link{polychoric}}, \code{\link{tetrachoric}}, \code{\link{polydi}} and \code{\link{polyserial}}. It first identifies the types of variables, organizes them by type (continuous, polytomous, dichotomous), calls the appropriate correlation function, and then binds the resulting matrices together. } \value{ \item{rho}{The complete matrix} \item{rx}{The Pearson correlation matrix for the continuous items} \item{poly}{the polychoric correlation (poly$rho) and the item difficulties (poly$tau)} \item{tetra}{the tetrachoric correlation (tetra$rho) and the item difficulties (tetra$tau)} } \author{William Revelle} \note{mixedCor was designed for the SAPA project (\url{https://www.sapa-project.org/}) with large data sets with a mixture of continuous, dichotomous, and polytomous data. For smaller data sets, it is sometimes the case that the global estimate of the tau parameter will lead to unstable solutions. This may be corrected by setting the global parameter = FALSE. \code{\link{mixedCor}} was added in April, 2017 to be slightly more user friendly. \code{\link{mixed.cor}} was deprecated in February, 2018. When finding correlations between dummy coded SAPA data (e.g., of occupations), the real correlations are all slightly less than zero because of the ipsatized nature of the data. This leads to a non-positive definite correlation matrix because the matrix is no longer of full rank. Smoothing will correct this, even though this might not be desired. Turn off smoothing in this case. Note that the variables no longer need to be organized by type of data: first continuous, then polytomous, then dichotomous. However, this automatic detection will lead to problems if the variables such as age are limited to less than 8 categories but those category values differ from the polytomous items. The fall back is to specify x, p, and d. If the number of alternatives in the polychoric data differ and there are some dicthotomous data, it is advisable to set correct=0. Timing: For large data sets, \code{\link{mixedCor}} takes a while. Progress messages \code{\link{progressBar}} report what is happening but do not actually report the rate of progress. ( The steps are initializing, Pearson r, polychorics, tetrachoric, polydi). It is recommended to use the multicore option although the benefit between 2, 4 and 8 cores seems fairly small: For large data sets (e.g., 255K subjects, 950 variables), with 4 cores running in parallel (options("mc.cores=4") on a MacBook Pro with 2.8 Ghz Intel Core I7, it took 2,873/2,887 seconds elapsed time, 8,152/7,718 secs of user time, and 1,762/1,489 of system time (with and without smoothing). This is noticeabably better than the 4,842 elapsed time (7,313 user, 1,459 system) for 2 cores but not much worse than running 8 virtual cores, with an elapsed time of 2,629, user time of 13,460, and system time of 2,679. On a Macbook Pro with 2 physical cores and a 3.3 GHz Intel Cor I7, running 4 multicores took 4,423 seconds elapsed time, 12,781 seconds of user time, and 2,605 system time. Running with 2 multicores, took slightly longer: 6,193 seconds elapsed time, 10,099 user time and 2,413 system time. } \references{ W.Revelle, J.Wilt, and A.Rosenthal. Personality and cognition: The personality-cognition link. In A.Gruszka, G. Matthews, and B. Szymura, editors, Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, chapter 2, pages 27-49. Springer, 2010. W Revelle, D. M. Condon, J. Wilt, J.A. French, A. Brown, and L G. Elleman(2016) Web and phone based data collection using planned missing designs in Nigel G. Fielding and Raymond M. Lee and Grant Blank (eds) SAGE Handbook of Online Research Methods, Sage Publications, Inc. W. Revelle, E.M. Dworak and D.M. Condon (2020) Exploring the persome: The power of the item in understanding personality structure. Personality and Individual Differences, \code{/10.1016/j.paid.2020.109905} } \seealso{ \code{\link{polychoric}}, \code{\link{tetrachoric}}, \code{\link{scoreItems}}, \code{\link{scoreOverlap}} \code{\link{scoreIrt}} } \examples{ data(bfi) r <- mixedCor(data=psychTools::bfi[,c(1:5,26,28)]) r #this is the same as r <- mixedCor(data=psychTools::bfi,p=1:5,c=28,d=26) r #note how the variable order reflects the original order in the data #compare to raw Pearson #note that the biserials and polychorics are not attenuated rp <- cor(psychTools::bfi[c(1:5,26,28)],use="pairwise") lowerMat(rp) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{models}psych/man/RMSEA.Rd0000644000176200001440000000224614444161615013323 0ustar liggesusers\name{RMSEA} \alias{RMSEA} \title{Root Mean Squared Error of Approximation from chisq, df, and n} \description{ Find the RMSEA from model chi square, degrees of freedom and number of observations. Show confidence intervals. } \usage{ RMSEA(chisq, dof, n.obs, alpha = 0.1) } \arguments{ \item{chisq}{The Chi square statistic from an analysis} \item{dof}{Degrees of freedom of the model} \item{n.obs}{Number of observations} \item{alpha}{alpha level for confidence intervals} } \details{RMSEA is just a chisquare adjusted by its degrees of freedom and the sample size. \eqn{\sqrt{chisq/(dof * (nobs)) -1/(n.obs -1)}}. It is given in most of the appropriate functions (e.g.,\code{\link{fa}}) and given here for completeness.} \value{ \item{RMSEA}{the estimated value} \item{RMSEA-L}{the lower bound} \item{RMSEA-U}{the upper bound} } \references{ Steiger, J. H.,and Lind, J. C. (1980). Statistically based tests for the number of common factors. Paper presented at the Annual Meeting of the Psychometric Society, Iowa City, IA. } \author{ William Revelle } \seealso{\code{\link{fa}}, \code{\link{omega}} , \code{\link{esem}} } \keyword{ multivariate } \keyword{ models}psych/man/score.multiple.choice.Rd0000644000176200001440000000561113715124770016652 0ustar liggesusers\name{score.multiple.choice} \alias{score.multiple.choice} \title{Score multiple choice items and provide basic test statistics } \description{Ability tests are typically multiple choice with one right answer. score.multiple.choice takes a scoring key and a data matrix (or data.frame) and finds total or average number right for each participant. Basic test statistics (alpha, average r, item means, item-whole correlations) are also reported. } \usage{ score.multiple.choice(key, data, score = TRUE, totals = FALSE, ilabels = NULL, missing = TRUE, impute = "median", digits = 2,short=TRUE,skew=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{key}{ A vector of the correct item alternatives} \item{data}{a matrix or data frame of items to be scored.} \item{score}{score=FALSE, just convert to right (1) or wrong (0).\cr score=TRUE, find the totals or average scores and do item analysis} \item{totals}{total=FALSE: find the average number correct \cr total=TRUE: find the total number correct} \item{ilabels}{item labels } \item{missing}{missing=TRUE: missing values are replaced with means or medians \cr missing=FALSE missing values are not scored } \item{impute}{impute="median", replace missing items with the median score \cr impute="mean": replace missing values with the item mean} \item{digits}{ How many digits of output } \item{short}{short=TRUE, just report the item statistics, \cr short=FALSE, report item statistics and subject scores as well} \item{skew}{Should the skews and kurtosi of the raw data be reported? Defaults to FALSE because what is the meaning of skew for a multiple choice item?} } \details{Basically combines \code{\link{score.items}} with a conversion from multiple choice to right/wrong. The item-whole correlation is inflated because of item overlap. The example data set is taken from the Synthetic Aperture Personality Assessment personality and ability test at \url{https://www.sapa-project.org/}. } \value{ \item{scores }{Subject scores on one scale} \item{missing }{Number of missing items for each subject} \item{item.stats}{scoring key, response frequencies, item whole correlations, n subjects scored, mean, sd, skew, kurtosis and se for each item} \item{alpha}{Cronbach's coefficient alpha} \item{av.r}{Average interitem correlation} } \author{William Revelle} \seealso{ \code{\link{score.items}}, \code{\link{omega}}} \examples{ data(psychTools::iqitems) iq.keys <- c(4,4,4, 6,6,3,4,4, 5,2,2,4, 3,2,6,7) score.multiple.choice(iq.keys,psychTools::iqitems) #just convert the items to true or false iq.tf <- score.multiple.choice(iq.keys,psychTools::iqitems,score=FALSE) describe(iq.tf) #compare to previous results } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{models} psych/man/phi.Rd0000644000176200001440000000536113211401437013223 0ustar liggesusers\name{phi} \alias{phi} \title{ Find the phi coefficient of correlation between two dichotomous variables } \description{Given a 1 x 4 vector or a 2 x 2 matrix of frequencies, find the phi coefficient of correlation. Typical use is in the case of predicting a dichotomous criterion from a dichotomous predictor. } \usage{ phi(t, digits = 2) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{t}{a 1 x 4 vector or a 2 x 2 matrix } \item{digits}{ round the result to digits } } \details{In many prediction situations, a dichotomous predictor (accept/reject) is validated against a dichotomous criterion (success/failure). Although a polychoric correlation estimates the underlying Pearson correlation as if the predictor and criteria were continuous and bivariate normal variables, and the tetrachoric correlation if both x and y are assumed to dichotomized normal distributions, the phi coefficient is the Pearson applied to a matrix of 0's and 1s. The phi coefficient was first reported by Yule (1912), but should not be confused with the \code{\link{Yule}} Q coefficient. For a very useful discussion of various measures of association given a 2 x 2 table, and why one should probably prefer the \code{\link{Yule}} Q coefficient, see Warren (2008). Given a two x two table of counts \cr \tabular{llll}{ \tab a \tab b \tab a+b (R1)\cr \tab c \tab d \tab c+d (R2)\cr \tab a+c(C1) \tab b+d (C2) \tab a+b+c+d (N) } convert all counts to fractions of the total and then \cr Phi = [a- (a+b)*(a+c)]/sqrt((a+b)(c+d)(a+c)(b+d) ) = \cr (a - R1 * C1)/sqrt(R1 * R2 * C1 * C2) This is in contrast to the Yule coefficient, Q, where \cr Q = (ad - bc)/(ad+bc) which is the same as \cr [a- (a+b)*(a+c)]/(ad+bc) Since the phi coefficient is just a Pearson correlation applied to dichotomous data, to find a matrix of phis from a data set involves just finding the correlations using cor or \code{\link{lowerCor}} or \code{\link{corr.test}}. } \value{phi coefficient of correlation } \author{William Revelle with modifications by Leo Gurtler } \references{Warrens, Matthijs (2008), On Association Coefficients for 2x2 Tables and Properties That Do Not Depend on the Marginal Distributions. Psychometrika, 73, 777-789. Yule, G.U. (1912). On the methods of measuring the association between two attributes. Journal of the Royal Statistical Society, 75, 579-652.} \seealso{ \code{\link{phi2tetra}}, \code{\link{AUC}}, \code{\link{Yule}}, \code{\link{Yule.inv}} \code{\link{Yule2phi}}, \code{\link{comorbidity}}, \code{\link{tetrachoric}} and \code{\link{polychoric}}} \examples{ phi(c(30,20,20,30)) phi(c(40,10,10,40)) x <- matrix(c(40,5,20,20),ncol=2) phi(x) } \keyword{multivariate }% at least one, from doc/KEYWORDS \keyword{models }% __ONLY ONE__ keyword per line psych/man/bigCor.Rd0000644000176200001440000001413214411327760013655 0ustar liggesusers\name{bigCor} \alias{bigCor} \title{Find large correlation matrices by stitching together smaller ones found more rapidly} \description{When analyzing many subjects (ie. 100,000 or more) with many variables (i.e. 1000 or more) core R can take a long time and sometime exceed memory limits (i.e. with 600K subjects and 6K variables). bigCor runs (in parallel if multicores are available) by breaking the variables into subsets (of size=size), finding all subset correlations, and then stitches the resulting matrices into one large matrix. Noticeable improvements in speed compared to cor. } \usage{ bigCor(x, size = NULL, use = "pairwise",cor="pearson") } \arguments{ \item{x}{A data set of numeric variables} \item{size}{What should the size of the subsets be? Defaults to NCOL (x)/20 } \item{use}{The standard correlation option. "pairwise" allows for missing data } \item{cor}{Defaults to Pearson correlations, alterative is polychoric. } } \details{ The data are divided into subsets of size=size. Correlations are then found for each subset and pairs of subsets. The basic loop loops over the subsets. When the size is a integer subset of the number of variables and is a multiple of the number of cores, the multiple cores will be used more. Notice the benefit of 660/80 versus 660/100. But this breaks down if we try 660/165. Further notice the benefit when using a smaller subset (55) which led to the 4 cores being used more. Timings (in seconds) for various problems with 645K subjects on an 8 core Mac Book Pro with a 2.4 GHZ Intell core i9. options(mc.cores=4) (Because we have 8 we can work at the same time as we test this.) First test it with 644,495 subjects and 1/10 of the number of possible variables. Then test it for somewhat fewer variables. \tabular{lllll}{ Variables \tab size \tab 2 cores \tab 4 cores \tab compared to normal cor function \cr 660 \tab 100 \tab 430 \tab 434 \tab 430 \cr 660 \tab 80 \tab 600 \tab 348 \tab notice the improvement with 8ths \cr 660 \tab 165 \tab \tab 666 \tab (Stitching seems to have been very slow) \cr 660 \tab 55 \tab \tab 303 \tab Even better if we break it into 12ths! \cr \cr 500 \tab 100 \tab \tab 332 \tab 322 secs \cr \cr 480 \tab 120 \tab 408 \tab 365 \tab 315 Better to change the size \cr 480 \tab 60 \tab 358 \tab 206 \tab This leads to 8 splits \cr \cr } We also test it with fewer subjects. Time is roughly linear with number of subjects. \tabular{lllll}{ Variables \tab size \tab 2 cores \tab 4 cores \tab compared to normal cor function Further comparisons with fewer subjects (100K) \cr \cr 480 \tab 60 \tab 57 \tab 31 \tab 47 with normal cor. Note the effect of n subjects! \cr 200 \tab 50 \tab 19.9 \tab 13.6 \tab 27.13 \cr 100 \tab 25 \tab 4.6 \tab 3.5 \tab 5.85 \cr \cr } One last comparison, 10,000 subjects, showing the effect of getting the proper size value. You can tune on these smaller sets of subjects before trying large problems. \tabular{lllll}{ Variables \tab size \tab 2 cores \tab 4 cores \tab compared to normal cor function \cr 480 \tab 120 \tab 5.2 \tab 5.1 \tab 4.51 \cr 480 \tab 60 \tab 2.9 \tab 2.88 \tab 4.51 \cr 480 \tab 30 \tab 2.65 \tab 2.691 \cr 480 \tab 20 \tab 2.73 \tab 2.77 \tab \cr 480 \tab 10 \tab 2.82 \tab 2.97 \tab too many splits? \cr 200 \tab 50 \tab 2.18 \tab 1.39 \tab 2.47 for normal cor (1.44 with 8 cores 2.99 with 1 core) \cr 200 \tab 25 \tab 1.2 \tab 1.17 \tab 2.47 for normal cor \cr (1.16 with 8 cores, 1.17 with 1 core) \cr 100 \tab 25 \tab .64 \tab .52 \tab .56 \cr } Timings updated in 2/23 using a MacBook Pro with M1 max chip 10,000 subjects 953 variables suggests that a very small size (e.g. 20) is probably optimal \tabular{lllllll}{ Variables \tab size \tab 2 cores \tab 4 cores \tab 8 cores \tab compared to normal cor function \cr 953 \tab 20 \tab 7.92 \tab 4.55 \tab 2.88 \tab 11.04 \cr 953 \tab 30 \tab 7.98 \tab 4.88 \tab 3.15 \tab 11.04 \cr 953 \tab 40 \tab 8.22 \tab 5.14 \tab 3.63 \tab 11.16 \cr 953 \tab 60 \tab 8.51 \tab 5.59 \tab 3.93 \tab 11.16 \cr 953 \tab 80 \tab 8.31 \tab 5.59 \tab 4.14 \tab 11.16 \cr 953 \tab 120 \tab 8.33 \tab 6.22 \4.75 \tab 11.16 \cr } I tried switching the BLAS from the default to the vecLib and this did not seem to make any meaningful difference. Time is roughly linear with the number of cases and increases by the square of the number of variables. The benefit of more cores is noticeable. It seems as if with 4 cores, we should use sizes to split it into 8 or 12 sets. Otherwise we don't actually use all cores efficiently. There is some overhead in using multicores. So for smaller problems (e.g. the 4,000 cases of the 145 items of the psychTools::spi data set, the timings are roughly .14 seconds for bigCor (default size) and .10 for normal cor. For small problems, this actually gets worse as we use more cores. The cross over point seems to be at roughly 5K subjects. (updated these timings to recognize the M1 Max chip. An increase of 4x in speed! They had been .44 and .36.) } \value{ The correlation matrix } \references{ Examples of large data sets with massively missing data are taken from the SAPA project. e.g., William Revelle, Elizabeth M. Dworak, and David M. Condon (2021) Exploring the persome: The power of the item in understanding personality structure. Personality and Individual Differences, 169, \doi{10.1016/j.paid.2020.109905} David Condon (2018)The SAPA Personality Inventory: an empirically-derived, hierarchically-organized self-report personality assessment model. PsyArXiv /sc4p9/ \doi{10.31234/osf.io/sc4p9} } \author{William Revelle } \seealso{\code{\link{pairwiseCountBig}} which will do the same, but find the count of observations per cell.} \examples{ R <- bigCor(psychTools::bfi,10) #compare the results with r.bfi <- cor(psychTools::bfi,use="pairwise") all.equal(R,r.bfi) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{multivariate }% __ONLY ONE__ keyword per line % Use only one keyword per line. psych/man/lmCor.Rd0000644000176200001440000005253214537105673013540 0ustar liggesusers\name{lmCor} \alias{setCor} \alias{lmCor} \alias{lmCor.diagram} \alias{lmDiagram} \alias{set.cor} \alias{mat.regress} \alias{matReg} \alias{crossValidation} \alias{crossValidationBoot} \alias{matPlot} \title{Multiple Regression, Canonical and Set Correlation from matrix or raw input} \description{A generalization of the lm function to correlation/covariance matrix input. Given a correlation matrix or a matrix or dataframe of raw data, find the multiple regressions and draw a path diagram relating a set of y variables as a function of a set of x variables. A set of covariates (z) can be partialled from the x and y sets. Regression diagrams are automatically included. The model can be specified in conventional formula form, or in terms of x variables and y variables. Multiplicative models (interactions) and quadratic terms may be specified in the formula mode if using raw data. By default, the data will be zero centered before finding the interactions. Will also find Cohen's Set Correlation between a predictor set of variables (x) and a criterion set (y). Also finds the canonical correlations between the x and y sets. Associated functions allow for cross validation of these regression models. } \usage{ lmCor(y,x,data,z=NULL,n.obs=NULL,use="pairwise",std=TRUE,square=FALSE, main="Regression Models",plot=TRUE,show=FALSE,zero=TRUE, alpha = .05,part=FALSE) #the prior name setCor(y,x,data,z=NULL,n.obs=NULL,use="pairwise",std=TRUE,square=FALSE, main="Regression Models",plot=TRUE,show=FALSE,zero=TRUE, alpha = .05,part=FALSE) lmDiagram(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) lmCor.diagram(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) #an alias to setCor or lmCor set.cor(y,x,data,z=NULL,n.obs=NULL,use="pairwise",std=TRUE,square=FALSE, main="Regression Models",plot=TRUE,show=FALSE,zero=TRUE,part=FALSE) mat.regress(y, x,data, z=NULL,n.obs=NULL,use="pairwise",square=FALSE) #the old form #does not handle formula input matReg(x,y,C,m=NULL,z=NULL,n.obs=0,means=NULL,std=FALSE,raw=TRUE,part=FALSE) #useful helper functions crossValidation(model,data,options=NULL,select=NULL) crossValidationBoot(y,x,data,n.iter=100) matPlot(x, type = "b", minlength=6, xlas=0,legend=NULL, lab=NULL,pch=16,col=1:6,lty=NULL,...) } \arguments{ \item{y}{Three options: 'formula' form (similar to lm) or either the column numbers of the y set (e.g., c(2,4,6) or the column names of the y set (e.g., c("Flags","Addition"). See notes and examples for each.} \item{x}{ either the column numbers of the x set (e.g., c(1,3,5) or the column names of the x set (e.g. c("Cubes","PaperFormBoard"). x and y may also be set by use of the formula style of lm.} \item{data}{A matrix or data.frame of correlations or, if not square, of raw data. Missing values are allowed (see use)} \item{C}{A variance/covariance matrix, or a correlation matrix} \item{m}{The column name or numbers of the set of mediating variables (see \code{\link{mediate}}).} \item{z}{the column names or numbers of the set of covariates. } \item{n.obs}{If specified, then confidence intervals, etc. are calculated, not needed if raw data are given.} \item{n.iter}{Number of bootstrap resamples to take in crossValidationBoot} \item{use}{Find the correlations using "pairwise" (default) or just use "complete" cases (to match the lm function)} \item{std}{Report standardized betas (based upon the correlations) or raw bs (based upon covariances)} \item{part}{z is specified should part (TRUE) or partial (default) correlations be found.} \item{raw}{Are data from a correlation matrix or data matrix?} \item{means}{A vector of means for the data in matReg if giving matrix input} \item{main}{The title for lmCor.diagram} \item{square}{if FALSE, then square matrices are treated as correlation matrices not as data matrices. In the rare case that one has as many cases as variables, then set square=TRUE.} \item{sc}{The output of lmCor or setCor may be used for drawing diagrams} \item{digits}{How many digits should be displayed in the lmCor.diagram?} \item{show}{Show the unweighted matrix correlation between the x and y sets?} \item{zero}{zero center the x variables before finding the interaction terms.} \item{alpha}{p value of the confidence intervals for the beta coefficients} \item{plot}{By default, lmCor makes a plot of the results, set to FALSE to suppress the plot} \item{cex}{Text size of boxes displaying the variables in the diagram} \item{l.cex}{Text size of numbers in arrows, defaults to cex} \item{...}{Additional graphical parameters for lmCor.diagram} \item{model}{The resulting object from lmCor or bestScales} \item{options}{If using a bestScales object, which set of keys to use ("best.key","weights","optimal.key","optimal.weights")} \item{select}{Not yet implemented option to crossValidation} \item{type}{type="b" draws both lines and points} \item{xlas}{Orientation of the x labels (3 to make vertical)} \item{minlength}{When abbreviating x labels how many characters as a minimum} \item{legend}{If not NULL, the draw a legend at the appropriate location} \item{pch}{What plot character(s) to use in matPlot (defaults to 18. should be less than 26 -nvar)} \item{col}{colors to use in matPlot} \item{lty}{line types to use in matPlot} \item{lab}{What labels should be supplied to the lines in matPlot. (Defaults to colnames of the variables.)} } \details{ Although it is more common to calculate multiple regression and canonical correlations from the raw data, it is, of course, possible to do so from a matrix of correlations or covariances. In this case, the input to the function is a square covariance or correlation matrix, as well as the column numbers (or names) of the x (predictor), y (criterion) variables, and if desired z (covariates). The function will find the correlations if given raw data. Input is either conventional formula mode, or the set of y variables and the set of x variables. Formula mode can be written in the standard formula style of lm (see last example). In this case, pairwise or higher interactions (product terms) may also be specified. By default, when finding product terms, the predictive variables are zero centered (Cohen, Cohen, West and Aiken, 2003), although this option can be turned off (zero=FALSE) to match the results of \code{\link{lm}} or the results discussed in Hayes (2013). Covariates to be removed are specified by a negative sign in the formula input or by using the z variable. Note that when specifying covariates, the regressions are done as if the regressions were done on the partialled variables. This means that the degrees of freedom and the R2 reflect the regressions of the partialled variables. (See the examples.) If using covariates, should they be removed from the dependent as well as the independent variables (part = FALSE, the default which means partial correlations or just the independent variables (part=TRUE or part aka semi-partial correlations). The difference between part correlations and partial correlations is whether the variance of the covariate is removed from both the DV and IVs (a partial correlation) or from just the IVs (a part correlation). The slopes of the regressions remain the same, but the amount of variance in the DV (and thus the standard errors) is larger when using part correlations. Using partial correlations for the covariates is equivalent to using the covariate in the regression when interpreting the effects of the other variables. The output is a set of multiple correlations, one for each dependent variable in the y set, as well as the set of canonical correlations. Since 2.3.12, the canonical loadings for the X and Y variables are returned (Xmat,YMat). An additional output is the R2 found using Cohen's set correlation (Cohen, 1982). This is a measure of how much variance and the x and y set share. Cohen (1982) introduced the set correlation, a multivariate generalization of the multiple correlation to measure the overall relationship between two sets of variables. It is an application of canonicial correlation (Hotelling, 1936) and \eqn{1 - \prod(1-\rho_i^2)} where \eqn{\rho_i^2} is the squared canonical correlation. Set correlation is the amount of shared variance (R2) between two sets of variables. With the addition of a third, covariate set, set correlation will find multivariate R2, as well as partial and semi partial R2. (The semi and bipartial options are not yet implemented.) Details on set correlation may be found in Cohen (1982), Cohen (1988) and Cohen, Cohen, Aiken and West (2003). R2 between two sets is just \deqn{R^2 = 1- \frac{\left | R_{yx} \right |}{\left | R_y \right | \left |R_x\right |} = 1 - \prod(1-\rho_i^2) }{R2 = 1- |R| /(|Ry| * |Rx|)} where R is the complete correlation matrix of the x and y variables and Rx and Ry are the two sets involved. Unfortunately, the R2 is sensitive to one of the canonical correlations being very high. An alternative, T2, is the proportion of additive variance and is the average of the squared canonicals. (Cohen et al., 2003), see also Cramer and Nicewander (1979). This average, because it includes some very small canonical correlations, will tend to be too small. Cohen et al. admonition is appropriate: "In the final analysis, however, analysts must be guided by their substantive and methodological conceptions of the problem at hand in their choice of a measure of association." ( p613). Yet another measure of the association between two sets is just the simple, unweighted correlation between the two sets. That is, \deqn{R_{uw} =\frac{ 1 R_{xy} 1' }{(1R_{yy}1')^{.5} (1R_{xx}1')^{.5}} }{Ruw=1Rxy1' / (sqrt(1Ryy1'* 1Rxx1'))} where Rxy is the matrix of correlations between the two sets. This is just the simple (unweighted) sums of the correlations in each matrix. This technique exemplifies the robust beauty of linear models and is particularly appropriate in the case of one dimension in both x and y, and will be a drastic underestimate in the case of items where the betas differ in sign. When finding the unweighted correlations, as is done in \code{\link{alpha}}, items are flipped so that they all are positively signed. A typical use in the SAPA project is to form item composites by clustering or factoring (see \code{\link{fa}},\code{\link{ICLUST}}, \code{\link{principal}}), extract the clusters from these results (\code{\link{factor2cluster}}), and then form the composite correlation matrix using \code{\link{cluster.cor}}. The variables in this reduced matrix may then be used in multiple R procedures using \code{\link{lmCor}}. Although the overall matrix can have missing correlations, the correlations in the subset of the matrix used for prediction must exist. If the number of observations is entered, then the conventional confidence intervals, statistical significance, and shrinkage estimates are reported. If the input is rectangular (not square), correlations or covariances are found from the data. By default, these are pairwise complete correlations. This may be specified in the 'use' option. The print function reports t and p values for the beta weights, the summary function just reports the beta weights. The Variance Inflation Factor is reported but should be taken with the normal cautions of interpretation discussed by Guide and Ketokivi. That is to say, VIF > 10 is not a magic cuttoff to define colinearity. It is merely 1/(1-smc(R(x)). The Guide and Ketokivi article is well worth reading for all who want to use various regression models. \code{\link{anova.psych}} allows for comparisons between alternative models. \code{\link{crossValidation}} can be used to take the results from \code{\link{lmCor}} or \code{\link{bestScales}} and apply the weights to a different data set. \code{\link{crossValidationBoot}} will bootstrap resample data and perform a lmCor and then return the crossValidation R values. Although it will handle covariates (z), it will not handle product terms. \code{\link{matPlot}} can be used to plot the crossValidation values (just a call to matplot with the xaxis given labels). matPlot has been improved to draw legends and to allow for specifications of the line types. \code{\link{lmCorLookup}} will sort the beta weights and report them with item contents if given a dictionary. \code{\link{matReg}} is primarily a helper function for \code{\link{mediate}} but is a general multiple regression function given a covariance matrix and the specified x, y and z variables. Its output includes betas, se, t, p and R2. The call includes m for mediation variables, but these are only used to adjust the degrees of freedom. \code{\link{matReg}} does not work on data matrices, nor does it take formula input. It is really just a helper function for \code{\link{mediate}} } \value{ \item{beta }{the beta weights for each variable in X for each variable in Y} \item{R}{The multiple R for each equation (the amount of change a unit in the predictor set leads to in the criterion set). } \item{R2 }{The multiple R2 (\% variance acounted for) for each equation} \item{VIF}{The Variance Inflation Factor which is just 1/(1-smc(x))} \item{se}{Standard errors of beta weights (if n.obs is specified)} \item{t}{t value of beta weights (if n.obs is specified)} \item{Probability}{Probability of beta = 0 (if n.obs is specified)} \item{shrunkenR2}{Estimated shrunken R2 (if n.obs is specified)} \item{setR2}{The multiple R2 of the set correlation between the x and y sets} \item{residual}{The residual correlation matrix of Y with x and z removed} \item{ruw}{The unit weighted multiple correlation for each dependent variable} \item{Ruw}{The unit weighted set correlation} \item{cancor}{The canonical correlations between the X and Y set.} \item{Xmat}{The path coefficients from the X variables to the canonical variables.} \item{Ymat}{The path coefficients from the Y variables to the canonical variables.} } \author{William Revelle \cr Maintainer: William Revelle } \references{ J. Cohen (1982) Set correlation as a general multivariate data-analytic method. Multivariate Behavioral Research, 17(3):301-341. J. Cohen, P. Cohen, S.G. West, and L.S. Aiken. (2003) Applied multiple regression/correlation analysis for the behavioral sciences. L. Erlbaum Associates, Mahwah, N.J., 3rd ed edition. H. Hotelling. (1936) Relations between two sets of variates. Biometrika 28(3/4):321-377. E.Cramer and W. A. Nicewander (1979) Some symmetric, invariant measures of multivariate association. Psychometrika, 44:43-54. V. Daniel R. Guide Jr. and M. Ketokivim (2015) Notes from the Editors: Redefining some methodological criteria for the journal. Journal of Operations Management. 37. v-viii. B.G. Tabacnick and L.S. Fidell (2007) Using Multivariate Statistics. Allyn and Bacon. } \note{As of April 30, 2011, the order of x and y was swapped in the call to be consistent with the general y ~ x syntax of the lm and aov functions. In addition, the primary name of the function was switched to setCor from mat.regress to reflect the estimation of the set correlation. This was changed to lmCor in the March, 2023 release to make the equivalence to lm more obvious. In October, 2017 I added the ability to specify the input in formula mode and allow for higher level and multiple interactions. The denominator degrees of freedom for the set correlation does not match that reported by Cohen et al., 2003 in the example on page 621 but does match the formula on page 615, except for the typo in the estimation of F (see Cohen 1982). The difference seems to be that they are adding in a correction factor of df 2 = df2 + df1. Following a suggestion by Keith Widaman, when zero centering for product terms, the y variables are no longer zero centered. This puts the regression in the units of the DV. (2/22/22). In December, 2023 I added the Xmat and Ymat coefficients for those who want to examine canonical correlations. This follows the example in Tabachnick and Fidell. Also added \code{\link{cancorDiagram}} to show the canonical path coefficients. } \seealso{\code{\link{mediate}} for an alternative regression model with 'mediation'. \code{\link{predict}} to find predicted values given regression weights. \code{\link{partial.r}} for partial and part correlations. \code{\link{cluster.cor}}, \code{\link{factor2cluster}},\code{\link{principal}},\code{\link{ICLUST}}, \code{link{cancor}} and cca in the yacca package. \code{\link{GSBE}} for further demonstrations of mediation and moderation. } \examples{ #First compare to lm using data input summary(lm(rating ~ complaints + privileges, data = attitude)) lmCor(rating ~ complaints + privileges, data = attitude, std=FALSE) #do not standardize z.attitude <- data.frame(scale(attitude)) #standardize the data before doing lm summary(lm(rating ~ complaints + privileges, data = z.attitude)) #regressions on z scores lmCor(rating ~ complaints + privileges, data = attitude) #by default we standardize and # the results are the same as the standardized lm R <- cor(attitude) #find the correlations #Do the regression on the correlations #Note that these match the regressions on the standard scores of the data lmCor(rating ~ complaints + privileges, data =R, n.obs=30) #now, partial out learning and critical lmCor(rating ~ complaints + privileges - learning - critical, data =R, n.obs=30) #compare with the full regression: lmCor(rating ~ complaints + privileges + learning + critical, data =R, n.obs=30) #Canonical correlations: #The first Kelley data set from Hotelling kelley1 <- structure(c(1, 0.6328, 0.2412, 0.0586, 0.6328, 1, -0.0553, 0.0655, 0.2412, -0.0553, 1, 0.4248, 0.0586, 0.0655, 0.4248, 1), .Dim = c(4L, 4L), .Dimnames = list(c("reading.speed", "reading.power", "math.speed", "math.power"), c("reading.speed", "reading.power", "math.speed", "math.power"))) lowerMat(kelley1) mod1 <- lmCor(y = math.speed + math.power ~ reading.speed + reading.power, data = kelley1, n.obs=140) mod1$cancor #Hotelling reports .3945 and .0688 we get 0.39450592 0.06884787 #the second Kelley data from Hotelling kelley <- structure(list(speed = c(1, 0.4248, 0.042, 0.0215, 0.0573), power = c(0.4248, 1, 0.1487, 0.2489, 0.2843), words = c(0.042, 0.1487, 1, 0.6693, 0.4662), symbols = c(0.0215, 0.2489, 0.6693, 1, 0.6915), meaningless = c(0.0573, 0.2843, 0.4662, 0.6915, 1)), .Names = c("speed", "power", "words", "symbols", "meaningless"), class = "data.frame", row.names = c("speed", "power", "words", "symbols", "meaningless")) lowerMat(kelley) mod2 <- lmCor(power + speed ~ words + symbols + meaningless,data=kelley) #formula mode #lmCor(y= 1:2,x = 3:5,data = kelley) #order of variables input cancorDiagram(mod2, cut=.05) #Hotelling reports canonical correlations of .3073 and .0583 or squared correlations of # 0.09443329 and 0.00339889 vs. our values of cancor = 0.3076 0.0593 with squared values #of 0.0946 0.0035, lmCor(y=c(7:9),x=c(1:6),data=Thurstone,n.obs=213) #easier to just list variable #locations if we have long names #now try partialling out some variables lmCor(y=c(7:9),x=c(1:3),z=c(4:6),data=Thurstone) #compare with the previous #compare complete print out with summary printing sc <- lmCor(SATV + SATQ ~ gender + education,data=sat.act) # regression from raw data sc summary(sc) lmCor(Pedigrees ~ Sentences + Vocabulary - First.Letters - Four.Letter.Words , data=Thurstone) #showing formula input with two covariates #Do some regressions with real data (rather than correlation matrices) lmCor(reaction ~ cond + pmi + import, data = Tal.Or) #partial out importance lmCor(reaction ~ cond + pmi - import, data = Tal.Or, main="Partial out importance") #compare with using lm by partialling mod1 <- lm(reaction ~ cond + pmi + import, data = Tal.Or) reaction.import <- lm(reaction~import,data=Tal.Or)$resid cond.import <- lm(cond~import,data=Tal.Or)$resid pmi.import <- lm(pmi~import,data=Tal.Or)$resid mod.partial <- lm(reaction.import ~ cond.import + pmi.import) summary(mod.partial) #lm uses raw scores, so set std = FALSE for lmCor print(lmCor(y = reaction ~ cond + pmi - import, data = Tal.Or,std = FALSE, main = "Partial out importance"),digits=4) #notice that the dfs of the partial approach using lm are 1 more than the lmCor dfs #Show how to find quadratic terms sc <- lmCor(reaction ~ cond + pmi + I(import^2), data = Tal.Or) sc #pairs.panels(sc$data) #show the SPLOM of the data #Consider an example of a derivation and cross validation sample set.seed(42) ss <- sample(1:2800,1400) model <- lmCor(y=26:28,x=1:25,data=bfi[ss,],plot=FALSE) original.fit <- crossValidation(model,bfi[ss,]) #the derivation set cross.fit <- crossValidation(model,bfi[-ss,]) #the cross validation set summary(original.fit) summary(cross.fit) predicted <- predict(model,bfi[-ss,]) cor2(predicted,bfi[-ss,26:28]) #these are the correlations of the predicted with observed #now do it for the correlations matrices - these should be the same R1 <- lowerCor(bfi[ss,], show=FALSE) R2 <- lowerCor(bfi[-ss,], show=FALSE) mod1 <- lmCor(y=26:28,x=1:25,data=R1,plot=FALSE) original <- crossValidation(model,R1) #the derivation set capitalizes on chance cross <- crossValidation(model,R2) #the cross validation set -- values are lower summary(original) #inflated by chance summary(cross) #cross validated values are better estimates } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{multivariate }% __ONLY ONE__ keyword per line psych/man/principal.Rd0000744000176200001440000003321314346114150014425 0ustar liggesusers\name{principal} \alias{principal} \alias{pca} \title{ Principal components analysis (PCA)} \description{Does an eigen value decomposition and returns eigen values, loadings, and degree of fit for a specified number of components. Basically it is just doing a principal components analysis (PCA) for n principal components of either a correlation or covariance matrix. Can show the residual correlations as well. The quality of reduction in the squared correlations is reported by comparing residual correlations to original correlations. Unlike princomp, this returns a subset of just the best nfactors. The eigen vectors are rescaled by the sqrt of the eigen values to produce the component loadings more typical in factor analysis. } \usage{ principal(r, nfactors = 1, residuals = FALSE,rotate="varimax",n.obs=NA, covar=FALSE, scores=TRUE,missing=FALSE,impute="median",oblique.scores=TRUE,method="regression", use ="pairwise",cor="cor",correct=.5,weight=NULL,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{r}{a correlation matrix. If a raw data matrix is used, the correlations will be found using pairwise deletions for missing values.} \item{nfactors}{Number of components to extract } \item{residuals}{ FALSE, do not show residuals, TRUE, report residuals } \item{rotate}{"none", "varimax", "quartimax", "promax", "oblimin", "simplimax", and "cluster" are possible rotations/transformations of the solution. See \code{\link{fa}} for all rotations avaiable.} \item{n.obs}{Number of observations used to find the correlation matrix if using a correlation matrix. Used for finding the goodness of fit statistics.} \item{covar}{If false, find the correlation matrix from the raw data or convert to a correlation matrix if given a square matrix as input.} \item{scores}{If TRUE, find component scores} \item{missing}{if scores are TRUE, and missing=TRUE, then impute missing values using either the median or the mean} \item{impute}{"median" or "mean" values are used to replace missing values} \item{oblique.scores}{If TRUE (default), then the component scores are based upon the structure matrix. If FALSE, upon the pattern matrix.} \item{method}{Which way of finding component scores should be used. The default is "regression"} \item{weight}{If not NULL, a vector of length n.obs that contains weights for each observation. The NULL case is equivalent to all cases being weighted 1.} \item{use}{How to treat missing data, use="pairwise" is the default". See cor for other options.} \item{cor}{How to find the correlations: "cor" is Pearson", "cov" is covariance, "tet" is tetrachoric, "poly" is polychoric, "mixed" uses mixedCor for a mixture of tetrachorics, polychorics, Pearsons, biserials, and polyserials, Yuleb is Yulebonett, Yuleq and YuleY are the obvious Yule coefficients as appropriate} \item{correct}{When doing tetrachoric, polycoric, or mixed cor, how should we treat empty cells. (See the discussion in the help for tetrachoric.)} \item{...}{other parameters to pass to functions such as factor.scores or the various rotation functions. } } \details{Useful for those cases where the correlation matrix is improper (perhaps because of SAPA techniques). There are a number of data reduction techniques including principal components analysis (PCA) and factor analysis (EFA). Both PC and FA attempt to approximate a given correlation or covariance matrix of rank n with matrix of lower rank (p). \eqn{_nR_n \approx _{n}F_{kk}F_n'+ U^2}{nRn = nFk kFn' + U2} where k is much less than n. For principal components, the item uniqueness is assumed to be zero and all elements of the correlation or covariance matrix are fitted. That is, \eqn{_nR_n \approx _{n}F_{kk}F_n'}{nRn = nFk kFn' } The primary empirical difference between a components versus a factor model is the treatment of the variances for each item. Philosophically, components are weighted composites of observed variables while in the factor model, variables are weighted composites of the factors. As the number of items increases, the difference between the two models gets smaller. Factor loadings are the asymptotic component loadings as the number of items gets larger. For a n x n correlation matrix, the n principal components completely reproduce the correlation matrix. However, if just the first k principal components are extracted, this is the best k dimensional approximation of the matrix. It is important to recognize that rotated principal components are not principal components (the axes associated with the eigen value decomposition) but are merely components. To point this out, unrotated principal components are labelled as PCi, while rotated PCs are now labeled as RCi (for rotated components) and obliquely transformed components as TCi (for transformed components). (Thanks to Ulrike Gromping for this suggestion.) Rotations and transformations are either part of psych (Promax and cluster), of base R (varimax), or of GPArotation (simplimax, quartimax, oblimin, etc.). Of the various rotation/transformation options, varimax, Varimax, quartimax, bentlerT, geominT, and bifactor do orthogonal rotations. Promax transforms obliquely with a target matix equal to the varimax solution. oblimin, quartimin, simplimax, bentlerQ, geominQ and biquartimin are oblique transformations. Most of these are just calls to the GPArotation package. The ``cluster'' option does a targeted rotation to a structure defined by the cluster representation of a varimax solution. With the optional "keys" parameter, the "target" option will rotate to a target supplied as a keys matrix. (See \code{\link{target.rot}}.) The rotation matrix (rot.mat) is returned from all of these options. This is the inverse of the Th (theta?) object returned by the GPArotation package. The correlations of the factors may be found by \eqn{\Phi = \theta' \theta}{Phi = Th' Th} Some of the statistics reported are more appropriate for (maximum likelihood) factor analysis rather than principal components analysis, and are reported to allow comparisons with these other models. Although for items, it is typical to find component scores by scoring the salient items (using, e.g., \code{\link{scoreItems}}) component scores are found by regression where the regression weights are \eqn{R^{-1} \lambda}{R^(-1) lambda} where \eqn{\lambda}{lambda} is the matrix of component loadings. The regression approach is done to be parallel with the factor analysis function \code{\link{fa}}. The regression weights are found from the inverse of the correlation matrix times the component loadings. This has the result that the component scores are standard scores (mean=0, sd = 1) of the standardized input. A comparison to the scores from \code{\link{princomp}} shows this difference. princomp does not, by default, standardize the data matrix, nor are the components themselves standardized. The regression weights are found from the Structure matrix, not the Pattern matrix. If the scores are found with the covar option = TRUE, then the scores are not standardized but are just mean centered. Jolliffe (2002) discusses why the interpretation of rotated components is complicated. Rencher (1992) discourages the use of rotated components. The approach used here is consistent with the factor analytic tradition. The correlations of the items with the component scores closely matches (as it should) the component loadings (as reported in the structure matrix). The output from the print.psych function displays the component loadings (from the pattern matrix), the h2 (communalities) the u2 (the uniquenesses), com (the complexity of the component loadings for that variable (see below). In the case of an orthogonal solution, h2 is merely the row sum of the squared component loadings. But for an oblique solution, it is the row sum of the (squared) orthogonal component loadings (remember, that rotations or transformations do not change the communality). This information is returned (invisibly) from the print function as the object Vaccounted. } \value{ \item{values}{Eigen Values of all components -- useful for a scree plot} \item{rotation}{which rotation was requested?} \item{n.obs}{number of observations specified or found} \item{communality}{Communality estimates for each item. These are merely the sum of squared factor loadings for that item.} \item{complexity}{Hoffman's index of complexity for each item. This is just \eqn{\frac{(\Sigma a_i^2)^2}{\Sigma a_i^4}}{{(\Sigma a_i^2)^2}/{\Sigma a_i^4}} where a_i is the factor loading on the ith factor. From Hofmann (1978), MBR. See also Pettersson and Turkheimer (2010).} \item{loadings }{A standard loading matrix of class ``loadings"} \item{fit }{Fit of the model to the correlation matrix } \item{fit.off}{how well are the off diagonal elements reproduced?} \item{residual }{Residual matrix -- if requested} \item{dof}{Degrees of Freedom for this model. This is the number of observed correlations minus the number of independent parameters (number of items * number of factors - nf*(nf-1)/2. That is, dof = niI * (ni-1)/2 - ni * nf + nf*(nf-1)/2.} \item{objective}{value of the function that is minimized by maximum likelihood procedures. This is reported for comparison purposes and as a way to estimate chi square goodness of fit. The objective function is \cr \eqn{f = (trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^{-1} R|) - n.items}{log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^-1 R|) - n.items}. Because components do not minimize the off diagonal, this fit will be not as good as for factor analysis. It is included merely for comparison purposes.} \item{STATISTIC}{If the number of observations is specified or found, this is a chi square based upon the objective function, f. Using the formula from \code{\link{factanal}}: \cr \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f } } \item{PVAL}{If n.obs > 0, then what is the probability of observing a chisquare this large or larger?} \item{Phi}{If oblique rotations (using oblimin from the GPArotation package) are requested, what is the interfactor correlation.} \item{scores}{If scores=TRUE, then estimates of the factor scores are reported } \item{weights}{The beta weights to find the principal components from the data} \item{R2}{The multiple R square between the factors and factor score estimates, if they were to be found. (From Grice, 2001) For components, these are of course 1.0.} \item{valid}{The correlations of the component score estimates with the components, if they were to be found and unit weights were used. (So called course coding).} \item{r.scores}{The correlation of the component scores. (Since components are just weighted linear sums of the items, this is the same as Phi).} \item{rot.mat}{The rotation matrix used to produce the rotated component loadings. } } \note{By default, the accuracy of the varimax rotation function seems to be less than the Varimax function. This can be enhanced by specifying eps=1e-14 in the call to principal if using varimax rotation. Furthermore, note that Varimax by default does not apply the Kaiser normalization, but varimax does. Gottfried Helms compared these two rotations with those produced by SPSS and found identical values if using the appropriate options. (See the last two examples.) The ability to use different kinds of correlations was added in version 1.9.12.31 to be compatible with the options in fa. } \author{ William Revelle} \references{ Grice, James W. (2001), Computing and evaluating factor scores. Psychological Methods, 6, 430-450 Jolliffe, I. (2002) Principal Component Analysis (2nd ed). Springer. Rencher, A. C. (1992) Interpretation of Canonical Discriminant Functions, Canonical Variates, and Principal Components, the American Statistician, (46) 217-225. Revelle, W. An introduction to psychometric theory with applications in R (in prep) Springer. Draft chapters available at \url{https://personality-project.org/r/book/} } \seealso{\code{\link{VSS}} (to test for the number of components or factors to extract), \code{\link{VSS.scree}} and \code{\link{fa.parallel}} to show a scree plot and compare it with random resamplings of the data), \code{\link{factor2cluster}} (for course coding keys), \code{\link{fa}} (for factor analysis), \code{\link{factor.congruence}} (to compare solutions), \code{\link{predict.psych}} to find factor/component scores for a new data set based upon the weights from an original data set. } \examples{ #Four principal components of the Harman 24 variable problem #compare to a four factor principal axes solution using factor.congruence pc <- principal(Harman74.cor$cov,4,rotate="varimax") mr <- fa(Harman74.cor$cov,4,rotate="varimax") #minres factor analysis pa <- fa(Harman74.cor$cov,4,rotate="varimax",fm="pa") # principal axis factor analysis round(factor.congruence(list(pc,mr,pa)),2) pc2 <- principal(Harman.5,2,rotate="varimax") pc2 round(cor(Harman.5,pc2$scores),2) #compare these correlations to the loadings #now do it for unstandardized scores, and transform obliquely pc2o <- principal(Harman.5,2,rotate="promax",covar=TRUE) pc2o round(cov(Harman.5,pc2o$scores),2) pc2o$Structure #this matches the covariances with the scores biplot(pc2,main="Biplot of the Harman.5 socio-economic variables",labels=paste0(1:12)) #For comparison with SPSS (contributed by Gottfried Helms) pc2v <- principal(iris[1:4],2,rotate="varimax",normalize=FALSE,eps=1e-14) print(pc2v,digits=7) pc2V <- principal(iris[1:4],2,rotate="Varimax",eps=1e-7) p <- print(pc2V,digits=7) round(p$Vaccounted,2) # the amount of variance accounted for is returned as an object of print } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/cohen.d.Rd0000644000176200001440000002572514360320267013776 0ustar liggesusers\name{cohen.d} \alias{cohen.d} \alias{d.robust} \alias{cohen.d.ci} \alias{d.ci} \alias{cohen.d.by} \alias{d2r} \alias{r2d} \alias{d2t} \alias{t2d} \alias{m2t} \alias{m2d} \alias{d2OVL} \alias{d2OVL2} \alias{d2CL} \alias{d2U3} \alias{cd.validity} \title{Find Cohen d and confidence intervals} \description{ Given a data.frame or matrix, find the standardized mean difference (Cohen's d) and confidence intervals for each variable depending upon a grouping variable. Convert the d statistic to the r equivalent, report the student's t statistic and associated p values, and return statistics for both values of the grouping variable. The Mahalanobis distance between the centroids of the two groups in the space defined by all the variables ia also found. Confidence intervals for Cohen d for one group (difference from 0) may also be found. Several measures of the distributional overlap (e.g. OVL, OVL2, etc.) are available. } \usage{ cohen.d(x, group,alpha=.05,std=TRUE,sort=NULL,dictionary=NULL,MD=TRUE,data=NULL) d.robust(x,group,trim=.2) cohen.d.ci(d,n=NULL,n2=NULL,n1=NULL,alpha=.05) d.ci(d,n=NULL,n2=NULL,n1=NULL,alpha=.05) cohen.d.by(x,group,group2,alpha=.05,MD=TRUE) d2r(d) r2d(rho) d2t(d,n=NULL,n2=NULL,n1=NULL) t2d(t,n=NULL,n2=NULL,n1=NULL) m2t(m1,m2,s1,s2,n1=NULL,n2=NULL,n=NULL,pooled=TRUE) #returns d invisibily m2d(m1,m2,s1,s2,n1=NULL,n2=NULL,n=NULL,pooled=TRUE) d2OVL(d) #Percent overlap for 1 distribtion d2OVL2(d) #Percent overlap joint distribution d2CL(d) #Common language effect size d2U3(d) #Proportion in higher group exceedding median of lower group cd.validity(d, keys, abs=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A data frame or matrix (can be specified in formula mode) } \item{group}{Some dichotomous grouping variable (may be specified using formula input (see example))} \item{group2}{Apply cohen.d for each of the subgroups defined by group2 (may be specified by formula as well)} \item{data}{If using formula mode and specifying a particular variable (see example)} \item{d}{An effect size} \item{keys}{A list of scoring keys (similar to scoreItems)} \item{abs}{When finding average cd validities, should we take absolute values (TRUE)} \item{trim}{The amount of trimming used in finding the means and sds in d.robust} \item{n}{Total sample size (of groups 1 and 2)} \item{n1}{Sample size of group 1 (if only one group)} \item{n2}{Sample size of group 2 } \item{pooled}{Pool the two variances} \item{t}{Student's t statistic} \item{alpha}{1-alpha is the width of the confidence interval} \item{std}{Find the correlation rather covariance matrix} \item{rho}{A correlation to be converted to an effect size} \item{m1}{Mean of group 1} \item{m2}{Mean of group 2} \item{s1}{Standard deviation of group 1} \item{s2}{Standard deviation of group 2} \item{sort}{Should we sort (and if so, in which direction), the results of cohen.d? Directions are "decreasing" or "increasing". If TRUE, sorts in a decreasing order.} \item{dictionary}{What are the items being described?} \item{MD}{Find Mahalanobis distance in cohen.d.} } \details{ There are many ways of reporting how two groups differ. Cohen's d statistic is just the differences of means expressed in terms of the pooled within group standard deviation. This is insensitive to sample size. r is the a universal measure of effect size that is a simple function of d, but is bounded -1 to 1. The t statistic is merely d * sqrt(n)/2 and thus reflects sample size. \deqn{d = \frac{M2 - M1}{Sp}}{(M2- M1)/Sp} where Sp is the pooled standard deviation. \deqn{Sp = \sqrt{\frac{(n1-1)*s1^2 + (n2-1)* s2^2}{N} } }{\sqrt{((n1-1)*s1^2 + (n2-1)* s2^2)/{N} } } Cohens d uses N as the divisor for the pooled sums of squares. Hedges g uses N-2. Confidence intervals for Cohen's d are found by converting the d to a t, finding the confidence intervals for t, and then converting those back to ds. This take advantage of the uniroot function and the non-centrality parameter of the t distribution. The results of \code{\link{cohen.d}} may be displayed using the \code{\link{error.dots}} function. This will include the labels provided in the dictionary. In the case of finding the confidence interval (using \code{\link{cohen.d.ci}} for a comparison against 0 (the one sample case), specify n1. This will yield a d = t/sqrt(n1) whereas in the case of the difference between two samples, d = 2*t/sqrt(n) (for equal sample sizes n = n1+ n2) or d = t/sqrt(1/n1 + 1/n2) for the case of unequal sample sizes. Since we find d and then convert this to t, using d2t, the question is how to pool the variances. Until 7/14/21 I was using the total n to estimate the t and thus the p values. In response to a query (see news), I switched to using the actual sample size ns (n1 and n2) and then finding t based upon the hedges g value. This produces t values as reported by t.test with the var.equal = TRUE option. It is probably useful to comment that the various confidence intervals reported are based upon normal theory and should be interpreted cautiously. \code{\link{cohen.d.by}} will find Cohen's d for groups for each subset of the data defined by group2. The summary of the output produces a simplified listing of the d values for each variable for each group. May be called directly from cohen.d by using formula input and specifying two grouping variables. \code{\link{d.robust}} follows Algina et al. 2005) to find trimmed means (trim =.2) and Winsorize variances (trim =.2). Supposedly, this provides a more robust estimate of effect sizes. \code{\link{m2t}} reports Student's t.test for two groups given their means, standard deviations, and sample size. This is convenient when checking statistics where those estimates are provided, but the raw data are not available. By default, it gives the pooled estimate of variance, but if pooled is FALSE, it applies Welch's correction. The Mahalanobis Distance combines the individual ds and weight them by their unique contribution: \eqn{D = \sqrt{d' R^{-1}d}}{D = \sqrt{d' R^{-1}d}}. By default, \code{\link{cohen.d}} will find the Mahalanobis distance between the two groups (if there is more than one DV.) This requires finding the correlation of all of the DVs and can fail if that matrix is not invertible because some pairs do not exist. Thus, setting MD=FALSE will prevent the Mahalanobis calculation. Marco del Giudice (2019) has a very helpful paper discussing how to interpret d and Md in terms of various overlap coefficients. These may be found by the use of the \code{\link{d2OVL}} (percent overlap for 1 distribution), \code{\link{d2OVL2}} percent overlap of joint distributions, \code{\link{d2CL}} (the common language effect size), and \code{\link{d2U3}} (proportion in higher group exceeding median of the lower group). \deqn{OVL = 2\phi(-d/2)}{OVL = 2 \phi(-d/2)} is the proportion of overlap (and gets smaller the larger the d). where Phi is the cumulative density function of the normal distribution. \deqn{OVL_2 = \frac{OVL}{2-OVL}}{OVL_2 = OVL/(2-OVL)} The proportion of individuals in one group above the median of the other group is U3 \deqn{U_3 = \phi_d}{U_3 = \phi(d)}. The Common Language Effect size \deqn{CL = \phi (d *\sqrt{2})}{CL = \phi(d * \sqrt(2) )} These last two get larger with (abs (d)). For graphic displays of Cohen's d and Mahalanobis D, see the \code{\link{scatterHist}} examples, or the example from the psychTools::GERAS data set. } \value{ \item{d}{Cohen's d statistic, including the upper and lower confidence levels} \item{hedges.g}{Hedge's g statistic, including the upper and lower confidence levels} \item{M.dist}{Mahalanobis distance between the two groups} \item{t}{Student's t statistic} \item{r}{The point biserial r equivalent of d} \item{n}{sample size used for each analysis} \item{p}{The probability of abs(t)>0} \item{descriptive}{The descriptive statistics for each group. This is useful to show means and then the d values.} \item{OVL}{etc. some of the measures of overlap discussed by DelGiudice, 2009} } \note{Cohen and Hedges differ in they way they calculate the pooled within group standard deviation. I find the treatment by McGrath and Meyer to be most helpful in understanding the differences. } \references{Cohen, Jackob (1988) Statistical Power Analysis for the Behavioral Sciences. 2nd Edition, Lawrence Erlbaum Associates. Algina, James and Keselman, H. J. and Penfield, Randall D. (2005) An Alternative to Cohen's Standardized Mean Difference Effect Size: A Robust Parameter and Confidence Interval in the Two Independent Groups Case. Psychological Methods. 10, 317-328. Goulet-Pelletier, Jean-Christophe and Cousineau, Denis.(2018) A review of effect sizes and their confidence intervals, Part I: The Cohen's d family. The Quantitative Methods for Psychology, 14, 242-265. Marco Del Giudice (2019) Measuring Sex Differences and Similarities, (in VanderLaan and Wong (ed. ) Gender and sexuality development: Contemporary theory and research.) McGrath, Robert E and Meyer, Gregory J. (2006) When effect sizes disagree: the case of r and d. Psychological methods, 11, 4, 386-401. } \author{ William Revelle } \seealso{ \code{\link{describeBy}}, \code{\link{describe}} \code{\link{error.dots}} to display the results. \code{\link{scatterHist}} to show d and MD for pairs of variables. (See in particular the use of \code{\link{scatterHist}} on psychTools::GERAS daa set.) } \examples{ cohen.d(sat.act,"gender") #robust version round(d.robust(sat.act,"gender")$robust.d,2) #formula input is nicer cohen.d(sat.act ~ gender) #formula input version #if we want to report the group means, we grab the data in descriptive cd <- cohen.d(sat.act ~ gender) cd.df <- data.frame(d = cd$cohen.d[,"effect"], male = cd$descriptive$mean[1,cd$order[-1]], female = cd$descriptive$mean[2, cd$order[-1]]) #report cohen.d by another group cd <- cohen.d.by(sat.act,"gender","education") cohen.d(SATV + SATQ ~ gender, data=sat.act) #just choose two variables summary(cd) #summarize the output #formula version combines these functions cd <- cohen.d(sat.act ~ gender + education) #find d by gender for each level of education summary(cd) #now show several examples of confidence intervals #one group (d vs 0) #consider the t from the cushny data set t2d( -4.0621,n1=10) d.ci(-1.284549,n1=10) #the confidence interval of the effect of drug on sleep #two groups d.ci(.62,n=64) #equal group size d.ci(.62,n1=35,n2=29) #unequal group size #several examples of d and t from data m2d(52.58,-70.65,49.9,47.5) #Terman and Miles 1936 #graphically show the various overlap statistics curve(d2OVL2(x),0,3,xlab="d",ylab="",lty="dashed", main="Four representations of effect size (d) ") curve(d2OVL(x),0,3,xlab="d",add=TRUE,) curve(d2CL(x),0,3,add=TRUE) curve(d2U3(x), add=TRUE,lty="dotted") text(1,.37,"OVL2") text(2,.37,"OVL") text(1,.88,"U3") text(2, .88,"CL") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ multivariate }% __ONLY ONE__ keyword per line psych/man/tal_or.Rd0000644000176200001440000000444313314207176013733 0ustar liggesusers\name{Tal_Or} \alias{Tal_Or} \alias{Tal.Or} \alias{pmi} \alias{tctg} \docType{data} \title{ Data set testing causal direction in presumed media influence} \description{ Nurit Tal-Or, Jonanathan Cohen, Yariv Tasfati, and Albert Gunther (2010) examined the presumed effect of media on other people and change in attitudes. This data set is from Study 2, and examined the effect of presumed influence of the media upon subsequent actions. It is used as an example of mediation by Hayes (2013) and for the mediate function. } \usage{data("Tal.Or")} \format{ A data frame with 123 observations on the following 6 variables. \describe{ \item{\code{cond}}{Experimental Condition: 0 low media importance, 1 high media importance } \item{\code{pmi}}{Presumed media influence (based upon the mean of two items} \item{\code{import}}{Importance of the issue } \item{\code{reaction}}{Subjects rated agreement about possible reactions to the story (mean of 4 items).} \item{\code{gender}}{1 = male, 2 = female } \item{\code{age}}{a numeric vector} } } \details{ Tal-Or et al. (2010) examined the presumed effect of the media in two experimental studies. These data are from study 2. `... perceptions regarding the influence of a news story about an expected shortage in sugar were manipulated indirectly, by manipulating the perceived exposure to the news story, and behavioral intentions resulting from the story were consequently measured." (p 801). } \source{ The data were downloaded from the webpages of Andrew Hayes (https://www.afhayes.com/public/hayes2018data.zip) supporting the first and second edition of his book. The name of the original data set was pmi. (Gender was recoded to reflect the number of X chromosomes). The original data are from Nurit Tal-Or, Jonathan Cohen, Yariv Tsfati, and Albert C. Gunther and are used with their kind permission. } \references{ Nurit Tal-Or, Jonathan Cohen, Yariv Tsfati and Albert C. Gunther (2010), Testing Causal Direction in the Influence of Presumed Media Influence, Communication Research, 37, 801-824. Hayes, Andrew F. (2013) Introduction to mediation, moderation, and conditional process analysis: A regression-based approach. Guilford Press. } \examples{ data(Tal.Or) mediate(reaction ~ cond + (pmi), data =Tal.Or,n.iter=50) } \keyword{datasets} psych/man/sim.Rd0000644000176200001440000004163214533707175013254 0ustar liggesusers\name{sim} \alias{sim} \alias{sim.simplex} \alias{sim.minor} \title{Functions to simulate psychological/psychometric data.} \description{A number of functions in the psych package will generate simulated data with particular structures. The three documented here are for basic factor analysis simulations. These functions include \code{\link{sim}} for a factor simplex, and \code{\link{sim.simplex}} for a data simplex, and \code{\link{sim.minor}} to simulate major and minor factors. Other simulations aimed at special item structures include \code{\link{sim.circ}} for a circumplex structure, \code{\link{sim.congeneric}} for a one factor factor congeneric model, \code{\link{sim.dichot}} to simulate dichotomous items, \code{\link{sim.hierarchical}} to create a hierarchical factor model, \code{\link{sim.item}} a more general item simulation are documented is separate help files. They are listed here to help find them. \code{\link{simCor}} to generate sample correlation matrices from a target matrix. \code{\link{sim.omega}} to test various examples of omega, \code{\link{sim.parallel}} to compare the efficiency of various ways of deterimining the number of factors. See the help pages for some more simulation functions here: \code{\link{sim.rasch}} to create simulated rasch data, \code{\link{sim.irt}} to create general 1 to 4 parameter IRT data by calling \code{\link{sim.npl}} 1 to 4 parameter logistic IRT or \code{\link{sim.npn}} 1 to 4 paramater normal IRT, \code{\link{sim.poly}} to create polytomous ideas by calling \code{\link{sim.poly.npn}} 1-4 parameter polytomous normal theory items or \code{\link{sim.poly.npl}} 1-4 parameter polytomous logistic items, and \code{\link{sim.poly.ideal}} which creates data following an ideal point or unfolding model by calling \code{\link{sim.poly.ideal.npn}} 1-4 parameter polytomous normal theory ideal point model or \code{\link{sim.poly.ideal.npl}} 1-4 parameter polytomous logistic ideal point model. \code{\link{sim.structural}} a general simulation of structural models, and \code{\link{sim.anova}} for ANOVA and lm simulations, and \code{\link{sim.VSS}}. Some of these functions are separately documented and are listed here for ease of the help function. See each function for more detailed help. } \usage{ sim(fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE,threshold=NULL, items = FALSE, low=-2,high=2,cat=5) sim.simplex(nvar =12, alpha=.8,lambda=0,beta=1,mu=NULL, n=0,threshold=NULL) sim.minor(nvar=12,nfact=3,n=0,g=NULL,fbig=NULL,fsmall = c(-.2,.2),n.small=NULL, Phi=NULL, bipolar=TRUE, threshold=NULL) } \arguments{ \item{fx}{The measurement model for x. If NULL, a 4 factor model is generated} \item{Phi}{The structure matrix of the latent variables} \item{fy}{The measurement model for y} \item{mu}{The means structure for the fx factors, defaults to 0,1,... 3. Set to 0 if using thresholds.} \item{n}{ Number of cases to simulate. If n=0 or NULL, the population matrix is returned.} \item{raw}{if raw=TRUE, raw data are returned as well.} \item{threshold}{If raw=TRUE (n >0) and threshold is not NULL, convert the data to binary values, cut at threshold.} \item{items}{TRUE if simulating items, FALSE if simulating scales} \item{low}{Restrict the item difficulties to range from low to high} \item{high}{Restrict the item difficulties to range from low to high} \item{cat}{Number of categories when creating binary (2) or polytomous items} \item{nvar}{Number of variables for a simplex structure} \item{nfact}{Number of large factors to simulate in sim.minor,number of group factors in sim.general,sim.omega} \item{g}{General factor correlations in sim.general and general factor loadings in sim.omega and sim.minor} \item{alpha}{the base correlation for an autoregressive simplex} \item{lambda}{the trait component of a State Trait Autoregressive Simplex} \item{beta}{Test reliability of a STARS simplex} \item{fbig}{Factor loadings for the main factors. Default is a simple structure with loadings sampled from (.8,.6) for nvar/nfact variables and 0 for the remaining. If fbig is specified, then each factor has loadings sampled from it.} \item{bipolar}{if TRUE, then positive and negative loadings are generated from fbig} \item{fsmall}{nvar/2 small factors are generated with loadings sampled from e.g. (-.2,0,.2)} \item{n.small}{If NULL, then generate nvar/2 small factors, else generate n.small small factors} } \details{Simulation of data structures is a very useful tool in psychometric research and teaching. By knowing ``truth" it is possible to see how well various algorithms can capture it. For a much longer discussion of the use of simulation in psychometrics, see the accompany vignettes. The simulations documented here are the core set of functions. Others are documented in other help files. \code{\link{sim}} simulates one (fx) or two (fx and fy) factor structures where both fx and fy respresent factor loadings of variables. The use of fy is particularly appropriate for simulating sem models. This is better documentated in the help for \code{\link{sim.structural}}. The code for \code{\link{sim}} has been enhanced (10/21/23) to simulate items as well as continuous variables. This matches the code in \code{\link{sim.structural}}. Perhaps the easist simulation to understand is just \code{\link{sim}}. A factor model (fx) and perhaps fy with intercorrelations between the two factor sets of Phi. This will produce a correlation matrix R = fx' phi fy. Factors can differ in their mean values by specifying mu. With the addition of the treshold parameter, \code{\link{sim}} will generate dichotomous items where values < threshold become 0, greater become 1. If threshold is a vector less than nvar, then values are sampled from threshold. If the length of threshold = nvar, then each item is dichotomized at threshold. With the adddition of the items=TRUE option, continuous observed variables are transformed into discrete categories. The default values for \code{\link{sim.structure}} is to generate a 4 factor, 12 variable data set with a simplex structure between the factors. This, and the simplex of items (\code{\link{sim.simplex}}) can also be converted in a STARS model with an autoregressive component (alpha) and a stable trait component (lambda). Two data structures that are particular challenges to exploratory factor analysis are the simplex structure and the presence of minor factors. Simplex structures \code{\link{sim.simplex}} will typically occur in developmental or learning contexts and have a correlation structure of r between adjacent variables and r^n for variables n apart. Although just one latent variable (r) needs to be estimated, the structure will have nvar-1 factors. An alternative version of the simplex is the State-Trait-Auto Regressive Structure (STARS) which has both a simplex state structure, with autoregressive path alpha and a trait structure with path lambda. This simulated in \code{\link{sim.simplex}} by specifying a non-zero lambda value. Many simulations of factor structures assume that except for the major factors, all residuals are normally distributed around 0. An alternative, and perhaps more realistic situation, is that the there are a few major (big) factors and many minor (small) factors. For a nice discussion of this situation, see Maccallum, Browne and Cai (2007). The challenge is thus to identify the major factors. \code{\link{sim.minor}} generates such structures. The structures generated can be thought of as having a major factor structure with some small correlated residuals. To make these simulations complete, the possibility of a general factor is considered. For simplicity, sim.minor allows one to specify a set of loadings to be sampled from for g, fmajor and fminor. Alternatively, it is possible to specify the complete factor matrix. By default, the nvar/2 minor factors are given loadings of +/- .2. If the major factors are very big, this leads to impossible models. a warning to try again is issued. Another structure worth considering is direct modeling of a general factor with several group factors. This is done using \code{\link{sim.general}}. Although coefficient \eqn{\omega}{\omega} is a very useful indicator of the general factor saturation of a unifactorial test (one with perhaps several sub factors), it has problems with the case of multiple, independent factors. In this situation, one of the factors is labelled as ``general'' and the omega estimate is too large. This situation may be explored using the \code{\link{sim.omega}} function with general left as NULL. If there is a general factor, then results from \code{\link{sim.omega}} suggests that omega estimated either from EFA or from SEM does a pretty good job of identifying it but that the EFA approach using Schmid-Leiman transformation is somewhat more robust than the SEM approach. The four irt simulations, \code{\link{sim.rasch}}, \code{\link{sim.irt}}, \code{\link{sim.npl}} and \code{\link{sim.npn}} simulate dichotomous items following the Item Response model. \code{\link{sim.irt}} just calls either \code{\link{sim.npl}} (for logistic models) or \code{\link{sim.npn}} (for normal models) depending upon the specification of the model. The logistic model is \deqn{P(i,j) = \gamma + \frac{\zeta-\gamma}{1+ e^{\alpha(\delta-\theta)}}}{P(i,j) = \gamma + (\zeta-\gamma)/(1+ exp(\alpha(\delta-\theta)))} where \eqn{\gamma} is the lower asymptote or guesssing parameter, \eqn{\zeta} is the upper asymptote (normally 1), \eqn{\alpha} is item discrimination and \eqn{\delta} is item difficulty. For the 1 Paramater Logistic (Rasch) model, gamma=0, zeta=1, alpha=1 and item difficulty is the only free parameter to specify. For the 2PL and 2PN models, a = \eqn{\alpha} and d = \eqn{\delta} are specified. \cr For the 3PL or 3PN models, items also differ in their guessing parameter c =\eqn{\gamma}. \cr For the 4PL and 4PN models, the upper asymptote, z= \eqn{\zeta} is also specified. \cr (Graphics of these may be seen in the demonstrations for the \code{\link{logistic}} function.) The normal model (irt.npn) calculates the probability using pnorm instead of the logistic function used in irt.npl, but the meaning of the parameters are otherwise the same. With the a = \eqn{\alpha} parameter = 1.702 in the logistic model the two models are practically identical. In parallel to the dichotomous IRT simulations are the poly versions which simulate polytomous item models. They have the additional parameter of how many categories to simulate. In addition, the \code{\link{sim.poly.ideal}} functions will simulate an ideal point or unfolding model in which the response probability varies by the distance from each subject's ideal point. Some have claimed that this is a more appropriate model of the responses to personality questionnaires. It will lead to simplex like structures which may be fit by a two factor model. The middle items form one factor, the extreme a bipolar factor. By default, the theta parameter is created in each function as normally distributed with mean mu=0 and sd=1. In the case where you want to specify the theta to be equivalent from another simulation or fixed for a particular experimental condition, either take the theta object from the output of a previous simulation, or create it using whatever properties are desired. The previous functions all assume one latent trait. Alternatively, we can simulate dichotomous or polytomous items with a particular structure using the sim.poly.mat function. This takes as input the population correlation matrix, the population marginals, and the sample size. It returns categorical items with the specified structure. Other simulation functions in psych that are documented separately includ: \code{\link{sim.structure}} A function to combine a measurement and structural model into one data matrix. Useful for understanding structural equation models. Combined with \code{\link{structure.diagram}} to see the proposed structure. \code{\link{sim.congeneric}} A function to create congeneric items/tests for demonstrating classical test theory. This is just a special case of sim.structure. \code{\link{sim.hierarchical}} A function to create data with a hierarchical (bifactor) structure. \code{\link{sim.item}} A function to create items that either have a simple structure or a circumplex structure. \code{\link{sim.circ}} Create data with a circumplex structure. \code{\link{sim.dichot}} Create dichotomous item data with a simple or circumplex structure. \code{\link{sim.minor}} Create a factor structure for nvar variables defined by nfact major factors and nvar/2 ``minor" factors for n observations. Although the standard factor model assumes that K major factors (K << nvar) will account for the correlations among the variables \deqn{R = FF' + U^2} where R is of rank P and F is a P x K matrix of factor coefficients and U is a diagonal matrix of uniquenesses. However, in many cases, particularly when working with items, there are many small factors (sometimes referred to as correlated residuals) that need to be considered as well. This leads to a data structure such that \deqn{R = FF' + MM' + U^2} where R is a P x P matrix of correlations, F is a P x K factor loading matrix, M is a P x P/2 matrix of minor factor loadings, and U is a diagonal matrix (P x P) of uniquenesses. Such a correlation matrix will have a poor \eqn{\chi^2} value in terms of goodness of fit if just the K factors are extracted, even though for all intents and purposes, it is well fit. \code{\link{sim.minor}} will generate such data sets with big factors with loadings of .6 to .8 and small factors with loadings of -.2 to .2. These may both be adjusted. \code{\link{sim.parallel}} Create a number of simulated data sets using \code{\link{sim.minor}} to show how parallel analysis works. The general observation is that with the presence of minor factors, parallel analysis is probably best done with component eigen values rather than factor eigen values, even when using the factor model. \code{\link{sim.anova}} Simulate a 3 way balanced ANOVA or linear model, with or without repeated measures. Useful for teaching research methods and generating teaching examples. \code{\link{sim.multilevel}} To understand some of the basic concepts of multilevel modeling, it is useful to create multilevel structures. The correlations of aggregated data is sometimes called an 'ecological correlation'. That group level and individual level correlations are independent makes such inferences problematic. This simulation allows for demonstrations that correlations within groups do not imply, nor are implied by, correlations between group means. } \value{ \item{model}{The model based correlation matrix (FF' )} \item{reliability}{Model based reliability} \item{r}{Observed (sample based) correlation matrix} \item{theta}{True scores generating the data} \item{N}{Sample size} \item{fload}{The factor model (F + S) generating the model} } \note{The theta values are "true scores" and may be compared to the factor analytic based factor score estimates of the data. This is a useful way to understand factor indeterminancy.} \references{ MacCallum, Robert C. and Browne, Michael W. and Cai, Li (2007) Factor analysis models as approximations. Factor analysis at 100: Historical developments and future directions. (Cudeck and MacCallum Eds). Lawrence Erlbaum Associates Publishers. Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. Springer. at \url{https://personality-project.org/r/book/} } \author{William Revelle} \seealso{ See above} \examples{ simplex <- sim.simplex() #create the default simplex structure lowerMat(simplex) #the correlation matrix #create a congeneric matrix congeneric <- sim.congeneric() lowerMat(congeneric) R <- sim.hierarchical() lowerMat(R) #now simulate categorical items with the hierarchical factor structure. #Let the items be dichotomous with varying item difficulties. marginals = matrix(c(seq(.1,.9,.1),seq(.9,.1,-.1)),byrow=TRUE,nrow=2) X <- sim.poly.mat(R=R,m=marginals,n=1000) lowerCor(X) #show the raw correlations #lowerMat(tetrachoric(X)$rho) # show the tetrachoric correlations (not run) #generate a structure fx <- matrix(c(.9,.8,.7,rep(0,6),c(.8,.7,.6)),ncol=2) fy <- c(.6,.5,.4) Phi <- matrix(c(1,0,.5,0,1,.4,0,0,0),ncol=3) R <- sim.structure(fx,Phi,fy) cor.plot(R$model) #show it graphically simp <- sim.simplex() #show the simplex structure using cor.plot cor.plot(simp,colors=TRUE,main="A simplex structure") #Show a STARS model simp <- sim.simplex(alpha=.8,lambda=.4) #show the simplex structure using cor.plot cor.plot(simp,colors=TRUE,main="State Trait Auto Regressive Simplex" ) dichot.sim <- sim.irt() #simulate 5 dichotomous items poly.sim <- sim.poly(theta=dichot.sim$theta) #simulate 5 polytomous items that correlate #with the dichotomous items } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{datagen} psych/man/scatter.hist.Rd0000644000176200001440000001363414537106042015066 0ustar liggesusers\name{scatterHist} \alias{scatter.hist} \alias{scatterHist} \title{Draw a scatter plot with associated X and Y histograms, densities and correlation} \description{Draw a X Y scatter plot with associated X and Y histograms with estimated densities. Will also draw density plots by groups, as well as distribution ellipses by group. Partly a demonstration of the use of layout. Also includes lowess smooth or linear model slope, as well as correlation and Mahalanobis distances } \usage{ scatterHist(x,y=NULL,smooth=TRUE,ab=FALSE, correl=TRUE,data=NULL, density=TRUE,means=TRUE, ellipse=TRUE,digits=2,method="pearson",cex.cor=1,cex.point=1, title="Scatter plot + density", xlab=NULL,ylab=NULL,smoother=FALSE,nrpoints=0,xlab.hist=NULL,ylab.hist=NULL,grid=FALSE, xlim=NULL,ylim=NULL,x.breaks=11,y.breaks=11, x.space=0,y.space=0,freq=TRUE,x.axes=TRUE,y.axes=TRUE,size=c(1,2), col=c("blue","red","black"),legend=NULL,alpha=.5,pch=21, show.d=TRUE, x.arrow=NULL,y.arrow=NULL,d.arrow=FALSE,cex.arrow=1,...) scatter.hist(x,y=NULL,smooth=TRUE,ab=FALSE, correl=TRUE,data=NULL,density=TRUE, means=TRUE, ellipse=TRUE,digits=2,method="pearson",cex.cor=1,cex.point=1, title="Scatter plot + density", xlab=NULL,ylab=NULL,smoother=FALSE,nrpoints=0,xlab.hist=NULL,ylab.hist=NULL,grid=FALSE, xlim=NULL,ylim=NULL,x.breaks=11,y.breaks=11, x.space=0,y.space=0,freq=TRUE,x.axes=TRUE,y.axes=TRUE,size=c(1,2), col=c("blue","red","black"),legend=NULL,alpha=.5,pch=21, show.d=TRUE, x.arrow=NULL,y.arrow=NULL,d.arrow=FALSE,cex.arrow=1,...) } \arguments{ \item{x}{The X vector, or the first column of a data.frame or matrix. Can be specified using formula input. } \item{y}{The Y vector, of if X is a data.frame or matrix, the second column of X} \item{smooth}{if TRUE, then add a loess smooth to the plot} \item{ab}{if TRUE, then show the best fitting linear fit} \item{correl}{TRUE: Show the correlation} \item{data}{if using formula input, the data must be specified} \item{density}{TRUE: Show the estimated densities} \item{means}{TRUE: If TRUE, show the means for the distributions. } \item{ellipse}{TRUE: draw 1 and 2 sigma ellipses and smooth} \item{digits}{How many digits to use if showing the correlation} \item{method}{Which method to use for correlation ("pearson","spearman","kendall") defaults to "pearson"} \item{smoother}{if TRUE, use smoothScatter instead of plot. Nice for large N.} \item{nrpoints}{If using smoothScatter, show nrpoints as dots. Defaults to 0} \item{grid}{If TRUE, show a grid for the scatter plot.} \item{cex.cor}{Adjustment for the size of the correlation} \item{cex.point}{Adjustment for the size of the data points} \item{xlab}{Label for the x axis} \item{ylab}{Label for the y axis} \item{xlim}{Allow specification for limits of x axis, although this seems to just work for the scatter plots.} \item{ylim}{Allow specification for limits of y axis} \item{x.breaks}{Number of breaks to suggest to the x axis histogram.} \item{y.breaks}{Number of breaks to suggest to the y axis histogram.} \item{x.space}{space between bars} \item{y.space}{Space between y bars} \item{freq}{Show frequency counts, otherwise show density counts} \item{x.axes}{Show the x axis for the x histogram} \item{y.axes}{Show the y axis for the y histogram} \item{size}{The sizes of the ellipses (in sd units). Defaults to 1,2} \item{col}{Colors to use when showing groups} \item{alpha}{Amount of transparency in the density plots} \item{legend}{Where to put a legend c("topleft","topright","top","left","right")} \item{pch}{Base plot character (each group is one more)} \item{xlab.hist}{Not currently available} \item{ylab.hist}{Label for y axis histogram. Not currently available.} \item{title}{An optional title} \item{show.d}{If TRUE, show the distances between the groups} \item{d.arrow}{If TRUE, draw an arrow between the two centroids} \item{x.arrow}{optional lable for the arrow connecting the two groups for the x axis} \item{y.arrow}{optional lable for the arrow connecting the two groups for the y axis} \item{cex.arrow}{cex control for the label size of the arrows.} \item{\dots}{Other parameters for graphics} } \details{Just a straightforward application of layout and barplot, with some tricks taken from \code{\link{pairs.panels}}. The various options allow for correlation ellipses (1 and 2 sigma from the mean), lowess smooths, linear fits, density curves on the histograms, and the value of the correlation. ellipse = TRUE implies smooth = TRUE. The grid option provides a background grid to the scatterplot. If using grouping variables, will draw ellipses (defaults to 1 sd) around each centroid. This is useful when demonstrating Mahalanobis distances. Formula input allows specification of grouping variables as well. ) For plotting data for two groups, Mahalobnis differences between the groups may be shown by drawing an arrow between the two centroids. This is a bit messy and it is useful to use pch="." in this case. } \author{William Revelle} \note{ Originally adapted from Addicted to R example 78. Modified following some nice suggestions from Jared Smith. Substantial revisions in 2021 to allow for a clearer demonstration of group differences. } \seealso{\code{\link{pairs.panels}} for multiple plots, \code{\link{multi.hist}} for multiple histograms and \code{\link{histBy}} for single variables with multiple groups. Perhaps the best example is found in the psychTools::GERAS data set. } \examples{ data(sat.act) with(sat.act,scatterHist(SATV,SATQ)) scatterHist(SATV ~ SATQ,data=sat.act) #formula input #or for something a bit more splashy scatter.hist(sat.act[5:6],pch=(19+sat.act$gender),col=c("blue","red")[sat.act$gender],grid=TRUE) #better yet scatterHist(SATV ~ SATQ + gender,data=sat.act) #formula input with a grouping variable } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ hplot } psych/man/Schmid.Leiman.Rd0000644000176200001440000000540011310346435015054 0ustar liggesusers\name{Schmid} \Rdversion{1.1} \alias{Schmid} \alias{schmid.leiman} \alias{West} \alias{Chen} \docType{data} \title{12 variables created by Schmid and Leiman to show the Schmid-Leiman Transformation} \description{ John Schmid and John M. Leiman (1957) discuss how to transform a hierarchical factor structure to a bifactor structure. Schmid contains the example 12 x 12 correlation matrix. schmid.leiman is a 12 x 12 correlation matrix with communalities on the diagonal. This can be used to show the effect of correcting for attenuation. Two additional data sets are taken from Chen et al. (2006). } \usage{data(Schmid)} \details{ Two artificial correlation matrices from Schmid and Leiman (1957). One real and one artificial covariance matrices from Chen et al. (2006). \itemize{ \item Schmid: a 12 x 12 artificial correlation matrix created to show the Schmid-Leiman transformation. \item schmid.leiman: A 12 x 12 matrix with communalities on the diagonal. Treating this as a covariance matrix shows the 6 x 6 factor solution \item Chen: An 18 x 18 covariance matrix of health related quality of life items from Chen et al. (2006). Number of observations = 403. The first item is a measure of the quality of life. The remaining 17 items form four subfactors: The items are (a) Cognition subscale: ``Have difficulty reasoning and solving problems?" ``React slowly to things that were said or done?"; ``Become confused and start several actions at a time?" ``Forget where you put things or appointments?"; ``Have difficulty concentrating?" (b) Vitality subscale: ``Feel tired?" ``Have enough energy to do the things you want?" (R) ``Feel worn out?" ; ``Feel full of pep?" (R). (c) Mental health subscale: ``Feel calm and peaceful?"(R) ``Feel downhearted and blue?"; ``Feel very happy"(R) ; ``Feel very nervous?" ; ``Feel so down in the dumps nothing could cheer you up? (d) Disease worry subscale: ``Were you afraid because of your health?"; ``Were you frustrated about your health?"; ``Was your health a worry in your life?" . \item West: A 16 x 16 artificial covariance matrix from Chen et al. (2006). } } \source{ John Schmid Jr. and John. M. Leiman (1957), The development of hierarchical factor solutions.Psychometrika, 22, 83-90. F.F. Chen, S.G. West, and K.H. Sousa.(2006) A comparison of bifactor and second-order models of quality of life. Multivariate Behavioral Research, 41(2):189-225, 2006. } \references{ Y.-F. Yung, D.Thissen, and L.D. McLeod. (1999) On the relationship between the higher-order factor model and the hierarchical factor model. Psychometrika, 64(2):113-128, 1999. } \examples{ data(Schmid) cor.plot(Schmid,TRUE) print(fa(Schmid,6,rotate="oblimin"),cut=0) #shows an oblique solution round(cov2cor(schmid.leiman),2) cor.plot(cov2cor(West),TRUE) } \keyword{datasets} psych/man/cluster.loadings.Rd0000644000176200001440000000651713715125306015736 0ustar liggesusers\name{cluster.loadings} \alias{cluster.loadings} \title{ Find item by cluster correlations, corrected for overlap and reliability } \description{ Given a n x n correlation matrix and a n x c matrix of -1,0,1 cluster weights for those n items on c clusters, find the correlation of each item with each cluster. If the item is part of the cluster, correct for item overlap. Part of the \code{\link{ICLUST}} set of functions, but useful for many item analysis problems. } \usage{ cluster.loadings(keys, r.mat, correct = TRUE,SMC=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{keys}{ Cluster keys: a matrix of -1,0,1 cluster weights} \item{r.mat}{ A correlation matrix } \item{correct}{Correct for reliability} \item{SMC}{Use the squared multiple correlation as a communality estimate, otherwise use the greatest correlation for each variable} } \details{Given a set of items to be scored as (perhaps overlapping) clusters and the intercorrelation matrix of the items, find the clusters and then the correlations of each item with each cluster. Correct for item overlap by replacing the item variance with its average within cluster inter-item correlation. Although part of ICLUST, this may be used in any SAPA (\url{https://www.sapa-project.org/}) application where we are interested in item-whole correlations of items and composite scales. For information about SAPA see Revelle et al, 2010, 2016. For information about SAPA based measures of ability, see \url{https://icar-project.org}. These loadings are particularly interpretable when sorted by absolute magnitude for each cluster (see \code{\link{ICLUST.sort}}). } \value{ \item{loadings }{A matrix of item-cluster correlations (loadings)} \item{cor }{Correlation matrix of the clusters} \item{corrected }{Correlation matrix of the clusters, raw correlations below the diagonal, alpha on diagonal, corrected for reliability above the diagonal} \item{sd }{Cluster standard deviations} \item{alpha }{alpha reliabilities of the clusters} \item{G6}{G6* Modified estimated of Guttman Lambda 6} \item{count}{Number of items in the cluster} } \references{ ICLUST: \url{https://personality-project.org/r/r.ICLUST.html} Revelle, W., Wilt, J., and Rosenthal, A. (2010) Individual Differences in Cognition: New Methods for examining the Personality-Cognition Link In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. Revelle, W, Condon, D.M., Wilt, J., French, J.A., Brown, A., and Elleman, L.G. (2016) Web and phone based data collection using planned missing designs. In Fielding, N.G., Lee, R.M. and Blank, G. (Eds). SAGE Handbook of Online Research Methods (2nd Ed), Sage Publcations. } \author{Maintainer: William Revelle \email{revelle@northwestern.edu} } \note{ Although part of ICLUST, this may be used in any SAPA application where we are interested in item- whole correlations of items and composite scales.} \seealso{ \code{\link{ICLUST}}, \code{\link{factor2cluster}}, \code{\link{cluster.cor}} } \examples{ r.mat<- Harman74.cor$cov clusters <- matrix(c(1,1,1,rep(0,24),1,1,1,1,rep(0,17)),ncol=2) cluster.loadings(clusters,r.mat) } \keyword{multivariate }% at least one, from doc/KEYWORDS \keyword{ cluster }% __ONLY ONE__ keyword per line psych/man/factor.scores.Rd0000644000176200001440000001342114356041150015215 0ustar liggesusers\name{factor.scores} \alias{factor.scores} \title{Various ways to estimate factor scores for the factor analysis model} \description{A fundamental problem with factor analysis is that although the model is defined at the structural level, it is indeterminate at the data level. This problem of factor indeterminancy leads to alternative ways of estimating factor scores, none of which is ideal. Following Grice (2001) four different methods are available here. } \usage{ factor.scores(x, f, Phi = NULL, method = c("Thurstone", "tenBerge", "Anderson", "Bartlett", "Harman","components"),rho=NULL,missing=FALSE,impute="none")} \arguments{ \item{x}{Either a matrix of data if scores are to be found, or a correlation matrix if just the factor weights are to be found.} \item{f}{The output from the \code{\link{fa}} or \code{\link{irt.fa}} functions, or a factor loading matrix.} \item{Phi}{If a pattern matrix is provided, then what were the factor intercorrelations. Does not need to be specified if f is the output from the \code{\link{fa}} or \code{\link{irt.fa}} functions.} \item{method}{Which of four factor score estimation procedures should be used. Defaults to "Thurstone" or regression based weights. See details below for the other four methods.} \item{rho}{If x is a set of data and rho is specified, then find scores based upon the fa results and the correlations reported in rho. Used when scoring fa.poly results.} \item{missing}{If missing is TRUE, missing items are imputed using either the median or mean. If missing is FALSE, the default, scores are found based upon the mean of the available items for each subject.} \item{impute}{By default, only complete cases are scored. But, missing data can be imputed using "median" or "mean". The number of missing by subject is reported. If impute = "none", missing data are not scored. } } \details{Although the factor analysis model is defined at the structural level, it is undefined at the data level. This is a well known but little discussed problem with factor analysis. Factor scores represent estimates of common part of the variables and should not be thought of as identical to the factors themselves. If a factor is thought of as a chop stick stuck into the center of an ice cream cone and factor score estimates are represented by straws anywhere along the edge of the cone the problem of factor indeterminacy becomes clear, for depending on the shape of the cone, two straws can be negatively correlated with each other. (The imagery is taken from Niels Waller, adapted from Stanley Mulaik). In a very clear discussion of the problem of factor score indeterminacy, Grice (2001) reviews several alternative ways of estimating factor scores and considers weighting schemes that will produce uncorrelated factor score estimates as well as the effect of using coarse coded (unit weighted) factor weights. \code{\link{factor.scores}} uses four different ways of estimate factor scores. In all cases, the factor score estimates are based upon the data matrix, X, times a weighting matrix, W, which weights the observed variables. For polytomous or dichotmous data, factor scores can be estimated using Item Response Theory techniques (e.g., using \code{link{irt.fa}} and then \code{link{scoreIrt}}. Such scores are still just factor score estimates, for the IRT model is a latent variable model equivalent to factor analysis. \itemize{ \item method="Thurstone" finds the regression based weights: \eqn{W = R^{-1} F} where R is the correlation matrix and F is the factor loading matrix. \item method="tenBerge" finds weights such that the correlation between factors for an oblique solution is preserved. Note that formula 8 in Grice has a typo in the formula for C and should be: \eqn{L = F \Phi^(1/2) } \eqn{C = R^(-1/2) L (L' R^(-1) L)^(-1/2) } \eqn{W = R ^(-1/2) C \Phi^(1/2) } \item method="Anderson" finds weights such that the factor scores will be uncorrelated: \eqn{W = U^{-2}F (F' U^{-2} R U^{-2} F)^{-1/2}} where U is the diagonal matrix of uniquenesses. The Anderson method works for orthogonal factors only, while the tenBerge method works for orthogonal or oblique solutions. \item method = "Bartlett" finds weights given \eqn{W = U^{-2}F (F' U^{-2}F)^{-1}} \item method="Harman" finds weights based upon socalled "idealized" variables: \eqn{W = F (t(F) F)^{-1}}. \item method="components" uses weights that are just component loadings. } } \value{ \itemize{ \item scores (the factor scores if the raw data is given) \item weights (the factor weights) \item r.scores (The correlations of the factor score estimates.) \item missing A vector of the number of missing observations per subject } } \references{ Grice, James W.,2001, Computing and evaluating factor scores, Psychological Methods, 6,4, 430-450. (note the typo in equation 8) ten Berge, Jos M.F., Wim P. Krijnen, Tom Wansbeek and Alexander Shapiro (1999) Some new results on correlation-preserving factor scores prediction methods. Linear Algebra and its Applications, 289, 311-318. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \seealso{ \code{\link{fa}}, \code{\link{factor.stats}} } \examples{ f3 <- fa(Thurstone,3 ) f3$weights #just the scoring weights f5 <- fa(psychTools::bfi,5) #this does the factor analyis my.scores <- factor.scores(psychTools::bfi,f5, method="tenBerge") #compare the tenBerge factor score correlation to the factor correlations cor(my.scores$scores,use="pairwise") - f5$Phi #compare to the f5$Phi values #compare the default (regression) score correlations to the factor correlations cor(f5$scores,use="pairwise") - f5$Phi #compare to the f5 solution } \keyword{ multivariate } \keyword{ models} psych/man/Garcia.Rd0000644000176200001440000001047414527003611013635 0ustar liggesusers\name{Garcia} \alias{Garcia} \alias{protest} \alias{GSBE} \docType{data} \title{Data from the sexism (protest) study of Garcia, Schmitt, Branscome, and Ellemers (2010) } \description{ Garcia, Schmitt, Branscombe, and Ellemers (2010) report data for 129 subjects on the effects of perceived sexism on anger and liking of women's reactions to ingroup members who protest discrimination. This data set is also used as the `protest' data set by Hayes (2013 and 2018). It is a useful example of mediation and moderation in regression. It may also be used as an example of plotting interactions. } \usage{data("GSBE")} \format{ A data frame with 129 observations on the following 6 variables. \describe{ \item{\code{protest}}{0 = no protest, 1 = Individual Protest, 2 = Collective Protest} \item{\code{sexism}}{Means of an 8 item Modern Sexism Scale.} \item{\code{anger}}{Anger towards the target of discrimination. ``I feel angry towards Catherine".} \item{\code{liking}}{Mean rating of 6 liking ratings of the target.} \item{\code{respappr}}{Mean of four items of appropriateness of the target's response.} \item{\code{prot2}}{A recoding of protest into two levels (to match Hayes, 2013).} } } \details{ The reaction of women to women who protest discriminatory treatment was examined in an experiment reported by Garcia et al. (2010). 129 women were given a description of sex discrimination in the workplace (a male lawyer was promoted over a clearly more qualified female lawyer). Subjects then read that the target lawyer felt that the decision was unfair. Subjects were then randomly assigned to three conditions: Control (no protest), Individual Protest (``They are treating me unfairly") , or Collective Protest (``The firm is is treating women unfairly"). Participants were then asked how much they liked the target (liking), how angry they were to the target (anger) and to evaluate the appropriateness of the target's response (respappr). Garcia et al (2010) report a number of interactions (moderation effects) as well as moderated-mediation effects. This data set is used as an example in Hayes (2013) for moderated mediation. It is used here to show how to do moderation (interaction terms) in regression (see \code{\link{setCor}}) , how to do moderated mediation (see \code{\link{mediate}}) and how draw interaction graphs (see help). } \source{ The data were downloaded from the webpages of Andrew Hayes (https://www.afhayes.com/public/hayes2018data.zip) supporting the first and second edition of his book. The second edition includes 6 variables, the first, just four. The protest variable in 2018 has three levels, but just two in the 2013 source. The data are used by kind permission of Donna M. Garcia, Michael T. Schmitt, Nyla R. Branscombe, and Naomi Ellemers. } \references{Garcia, Donna M. and Schmitt, Michael T. and Branscombe, Nyla R. and Ellemers, Naomi (2010). Women's reactions to ingroup members who protest discriminatory treatment: The importance of beliefs about inequality and response appropriateness. European Journal of Social Psychology, (40) 733-745. Hayes, Andrew F. (2013) Introduction to mediation, moderation, and conditional process analysis: A regression-based approach. Guilford Press. } \examples{ data(GSBE) #alias to Garcia data set ## Just do regressions with interactions lmCor(respappr ~ prot2 * sexism,std=FALSE,data=Garcia,main="Moderated (mean centered )") lmCor(respappr ~ prot2 * sexism,std=FALSE,data=Garcia,main="Moderated (don't center)", zero=FALSE) #demonstrate interaction plots plot(respappr ~ sexism, pch = 23- protest, bg = c("black","red", "blue")[protest], data=Garcia, main = "Response to sexism varies as type of protest") by(Garcia,Garcia$protest, function(x) abline(lm(respappr ~ sexism, data =x),lty=c("solid","dashed","dotted")[x$protest+1])) text(6.5,3.5,"No protest") text(3,3.9,"Individual") text(3,5.2,"Collective") #compare two models (bootstrapping n.iter set to 50 for speed # 1) mean center the variables prior to taking product terms mod1 <- mediate(respappr ~ prot2 * sexism +(sexism),data=Garcia,n.iter=50 ,main="Moderated mediation (mean centered)") # 2) do not mean center mod2 <- mediate(respappr ~ prot2 * sexism +(sexism),data=Garcia,zero=FALSE, n.iter=50, main="Moderated mediation (not centered") summary(mod1) summary(mod2) } \keyword{datasets} psych/man/fa.random.Rd0000644000176200001440000003534613464054220014322 0ustar liggesusers\name{fa.random} \alias{fa.random} \title{A first approximation to Random Effects Exploratory Factor Analysis} \description{Inspired, in part, by the wprifm function in the profileR package, fa.random removes between subject differences in mean level and then does a normal exploratory factor analysis of the ipsatized data. Functionally, this removes a general factor of the data before factoring. To prevent non-positive definiteness of the residual data matrix, a very small amount of random noise is added to each variable. This is just a call to fa after removing the between subjects effect. Read the help file for \code{\link{fa}} for a detailed explanation of all of the input parameters and the output objects. } \usage{ fa.random(data, nfactors = 1, fix = TRUE, n.obs = NA, n.iter = 1, rotate = "oblimin", scores = "regression", residuals = FALSE, SMC = TRUE, covar = FALSE, missing = FALSE, impute = "median", min.err = 0.001, max.iter = 50, symmetric = TRUE, warnings = TRUE, fm = "minres", alpha = 0.1, p = 0.05, oblique.scores = FALSE, np.obs = NULL, use = "pairwise", cor = "cor", weight = NULL, ...) } \arguments{ \item{data}{A raw data matrix (or data.frame)} \item{nfactors}{ Number of factors to extract, default is 1 } \item{fix}{If TRUE, then a small amount of random error is added to each observed variable to keep the matrix positive semi-definite. If FALSE, then this is not done but because the matrix is non-positive semi-definite it will need to be smoothed when finding the scores and the various statistics.} \item{n.obs}{Number of observations used to find the correlation matrix if using a correlation matrix. Used for finding the goodness of fit statistics. Must be specified if using a correlaton matrix and finding confidence intervals. Ignored.} \item{np.obs}{The pairwise number of observations. Used if using a correlation matrix and asking for a minchi solution.} \item{rotate}{"none", "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor" are orthogonal rotations. "Promax", "promax", "oblimin", "simplimax", "bentlerQ, "geominQ" and "biquartimin" and "cluster" are possible oblique transformations of the solution. The default is to do a oblimin transformation, although versions prior to 2009 defaulted to varimax. SPSS seems to do a Kaiser normalization before doing Promax, this is done here by the call to "promax" which does the normalization before calling Promax in GPArotation.} \item{n.iter}{Number of bootstrap interations to do in fa or fa.poly} \item{residuals}{Should the residual matrix be shown } \item{scores}{the default="regression" finds factor scores using regression. Alternatives for estimating factor scores include simple regression ("Thurstone"), correlaton preserving ("tenBerge") as well as "Anderson" and "Bartlett" using the appropriate algorithms ( \code{\link{factor.scores}}). Although scores="tenBerge" is probably preferred for most solutions, it will lead to problems with some improper correlation matrices. } \item{SMC}{Use squared multiple correlations (SMC=TRUE) or use 1 as initial communality estimate. Try using 1 if imaginary eigen values are reported. If SMC is a vector of length the number of variables, then these values are used as starting values in the case of fm='pa'. } \item{covar}{if covar is TRUE, factor the covariance matrix, otherwise factor the correlation matrix} \item{missing}{if scores are TRUE, and missing=TRUE, then impute missing values using either the median or the mean} \item{impute}{"median" or "mean" values are used to replace missing values} \item{min.err}{Iterate until the change in communalities is less than min.err} \item{max.iter}{Maximum number of iterations for convergence } \item{symmetric}{symmetric=TRUE forces symmetry by just looking at the lower off diagonal values} \item{warnings}{warnings=TRUE => warn if number of factors is too many } \item{fm}{Factoring method fm="minres" will do a minimum residual as will fm="uls". Both of these use a first derivative. fm="ols" differs very slightly from "minres" in that it minimizes the entire residual matrix using an OLS procedure but uses the empirical first derivative. This will be slower. fm="wls" will do a weighted least squares (WLS) solution, fm="gls" does a generalized weighted least squares (GLS), fm="pa" will do the principal factor solution, fm="ml" will do a maximum likelihood factor analysis. fm="minchi" will minimize the sample size weighted chi square when treating pairwise correlations with different number of subjects per pair. fm ="minrank" will do a minimum rank factor analysis. "old.min" will do minimal residual the way it was done prior to April, 2017 (see discussion below).} \item{alpha}{alpha level for the confidence intervals for RMSEA} \item{p}{if doing iterations to find confidence intervals, what probability values should be found for the confidence intervals} \item{oblique.scores}{When factor scores are found, should they be based on the structure matrix (default) or the pattern matrix (oblique.scores=TRUE). } \item{weight}{If not NULL, a vector of length n.obs that contains weights for each observation. The NULL case is equivalent to all cases being weighted 1.} \item{use}{How to treat missing data, use="pairwise" is the default". See cor for other options.} \item{cor}{How to find the correlations: "cor" is Pearson", "cov" is covariance, "tet" is tetrachoric, "poly" is polychoric, "mixed" uses mixed cor for a mixture of tetrachorics, polychorics, Pearsons, biserials, and polyserials, Yuleb is Yulebonett, Yuleq and YuleY are the obvious Yule coefficients as appropriate} \item{...}{additional parameters, specifically, keys may be passed if using the target rotation, or delta if using geominQ, or whether to normalize if using Varimax} } \details{This function is inspired by the wprifm function in the profileR package and the citation there to a paper by Davison, Kim and Close (2009). The basic logic is to extract a means vector from each subject and then to analyze the resulting ipsatized data matrix. This can be seen as removing acquiecence in the case of personality items, or the general factor, in the case of ability items. Factors composed of items that are all keyed the same way (e.g., Neuroticism in the \code{\link[psychTools]{bfi}} data set) will be most affected by this technique. The output is identical to the normal \code{\link{fa}} output with the addition of two objects: subject and within.r. The subject object is just the vector of the mean score for each subject on all the items. within.r is just the correlation of each item with those scores. } \value{ \item{subject}{A vector of the mean score on all items for each subject} \item{within.r}{The correlation of each item with the subject vector} \item{values }{Eigen values of the common factor solution} \item{e.values}{Eigen values of the original matrix} \item{communality}{Communality estimates for each item. These are merely the sum of squared factor loadings for that item.} \item{communalities}{If using minrank factor analysis, these are the communalities reflecting the total amount of common variance. They will exceed the communality (above) which is the model estimated common variance. } \item{rotation}{which rotation was requested?} \item{n.obs}{number of observations specified or found} \item{loadings}{An item by factor (pattern) loading matrix of class ``loadings" Suitable for use in other programs (e.g., GPA rotation or factor2cluster. To show these by sorted order, use \code{\link{print.psych}} with sort=TRUE} \item{complexity}{Hoffman's index of complexity for each item. This is just \eqn{\frac{(\Sigma a_i^2)^2}{\Sigma a_i^4}}{{(\Sigma a_i^2)^2}/{\Sigma a_i^4}} where a_i is the factor loading on the ith factor. From Hofmann (1978), MBR. See also Pettersson and Turkheimer (2010).} \item{Structure}{An item by factor structure matrix of class ``loadings". This is just the loadings (pattern) matrix times the factor intercorrelation matrix.} \item{fit}{How well does the factor model reproduce the correlation matrix. This is just \eqn{\frac{\Sigma r_{ij}^2 - \Sigma r^{*2}_{ij} }{\Sigma r_{ij}^2} }{(sum(r^2ij - sum(r*^2ij))/sum(r^2ij} (See \code{\link{VSS}}, \code{\link{ICLUST}}, and \code{\link{principal}} for this fit statistic.} \item{fit.off}{how well are the off diagonal elements reproduced?} \item{dof}{Degrees of Freedom for this model. This is the number of observed correlations minus the number of independent parameters. Let n=Number of items, nf = number of factors then \cr \eqn{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}} \item{objective}{Value of the function that is minimized by a maximum likelihood procedures. This is reported for comparison purposes and as a way to estimate chi square goodness of fit. The objective function is \cr \eqn{f = log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^{-1} R|) - n.items}{log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^-1 R|) - n.items}. When using MLE, this function is minimized. When using OLS (minres), although we are not minimizing this function directly, we can still calculate it in order to compare the solution to a MLE fit. } \item{STATISTIC}{If the number of observations is specified or found, this is a chi square based upon the objective function, f (see above). Using the formula from \code{\link{factanal}}(which seems to be Bartlett's test) : \cr \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f } } \item{PVAL}{If n.obs > 0, then what is the probability of observing a chisquare this large or larger?} \item{Phi}{If oblique rotations (e.g,m using oblimin from the GPArotation package or promax) are requested, what is the interfactor correlation?} \item{communality.iterations}{The history of the communality estimates (For principal axis only.) Probably only useful for teaching what happens in the process of iterative fitting.} \item{residual}{The matrix of residual correlations after the factor model is applied. To display it conveniently, use the \code{\link{residuals}} command. } \item{chi}{When normal theory fails (e.g., in the case of non-positive definite matrices), it useful to examine the empirically derived \eqn{\chi^2}{chi^2} based upon the sum of the squared residuals * N. This will differ slightly from the MLE estimate which is based upon the fitting function rather than the actual residuals.} \item{rms}{This is the sum of the squared (off diagonal residuals) divided by the degrees of freedom. Comparable to an RMSEA which, because it is based upon \eqn{\chi^2}{chi^2}, requires the number of observations to be specified. The rms is an empirical value while the RMSEA is based upon normal theory and the non-central \eqn{\chi^2}{chi^2} distribution. That is to say, if the residuals are particularly non-normal, the rms value and the associated \eqn{\chi^2}{chi^2} and RMSEA can differ substantially. } \item{crms}{rms adjusted for degrees of freedom} \item{RMSEA}{The Root Mean Square Error of Approximation is based upon the non-central \eqn{\chi^2}{chi^2} distribution and the \eqn{\chi^2}{chi^2} estimate found from the MLE fitting function. With normal theory data, this is fine. But when the residuals are not distributed according to a noncentral \eqn{\chi^2}{chi^2}, this can give very strange values. (And thus the confidence intervals can not be calculated.) The RMSEA is a conventional index of goodness (badness) of fit but it is also useful to examine the actual rms values. } \item{TLI}{The Tucker Lewis Index of factoring reliability which is also known as the non-normed fit index. } \item{BIC}{Based upon \eqn{\chi^2}{chi^2} with the assumption of normal theory and using the \eqn{\chi^2}{chi^2} found using the objective function defined above. This is just \eqn{\chi^2 - 2 df}{chi^2 - 2 df}} \item{eBIC}{When normal theory fails (e.g., in the case of non-positive definite matrices), it useful to examine the empirically derived eBIC based upon the empirical \eqn{\chi^2}{chi^2} - 2 df. } \item{R2}{The multiple R square between the factors and factor score estimates, if they were to be found. (From Grice, 2001). Derived from R2 is is the minimum correlation between any two factor estimates = 2R2-1. } \item{r.scores}{The correlations of the factor score estimates using the specified model, if they were to be found. Comparing these correlations with that of the scores themselves will show, if an alternative estimate of factor scores is used (e.g., the tenBerge method), the problem of factor indeterminacy. For these correlations will not necessarily be the same. } \item{weights}{The beta weights to find the factor score estimates. These are also used by the \code{\link{predict.psych}} function to find predicted factor scores for new cases. These weights will depend upon the scoring method requested. } \item{scores}{The factor scores as requested. Note that these scores reflect the choice of the way scores should be estimated (see scores in the input). That is, simple regression ("Thurstone"), correlaton preserving ("tenBerge") as well as "Anderson" and "Bartlett" using the appropriate algorithms (see \code{\link{factor.scores}}). The correlation between factor score estimates (r.scores) is based upon using the regression/Thurstone approach. The actual correlation between scores will reflect the rotation algorithm chosen and may be found by correlating those scores. Although the scores are found by multiplying the standarized data by the weights matrix, this will not result in standard scores if using regression. } \item{valid}{The validity coffiecient of course coded (unit weighted) factor score estimates (From Grice, 2001)} \item{score.cor}{The correlation matrix of course coded (unit weighted) factor score estimates, if they were to be found, based upon the loadings matrix rather than the weights matrix. } \item{rot.mat}{The rotation matrix as returned from GPArotation.} } \references{ Davison, Mark L. and Kim, Se-Kang and Close, Catherine (2009) Factor Analytic Modeling of Within Person Variation in Score Profiles. Multivariate Behavioral Research (44(5) 668-687. } \author{ William Revelle } \note{An interesting, but not necessarily good, idea. To see what this does if there is a general factor, consider the unrotated solutions to the ability data set. In particular, compare the first factor loading with its congruence to the ipsatized solution means vector correlated with the items (the within.r object). } \seealso{ \code{\link{fa}} } \examples{ fa.ab <- fa(psychTools::ability,4,rotate="none") #normal factor analysis fa.ab.ip <- fa.random(psychTools::ability,3,rotate="none") fa.congruence(list(fa.ab,fa.ab.ip,fa.ab.ip$within.r)) } \keyword{ multivariate } \keyword{ models}psych/man/factor.model.Rd0000644000176200001440000000261013256544634015032 0ustar liggesusers\name{factor.model} \alias{factor.model} \title{ Find R = F F' + U2 is the basic factor model } \description{The basic factor or principal components model is that a correlation or covariance matrix may be reproduced by the product of a factor loading matrix times its transpose. Find this reproduced matrix. Used by \code{\link{factor.fit}}, \code{\link{VSS}}, \code{\link{ICLUST}}, etc. } \usage{ factor.model(f,Phi=NULL,U2=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{f}{ A matrix of loadings. } \item{Phi}{A matrix of factor correlations} \item{U2}{Should the diagonal be model by ff' (U2 = TRUE) or replaced with 1's (U2 = FALSE)} } \value{ A correlation or covariance matrix. } \references{Gorsuch, Richard, (1983) Factor Analysis. Lawrence Erlebaum Associates. \cr Revelle, W. In preparation) An Introduction to Psychometric Theory with applications in R (\url{https://personality-project.org/r/book/}) } \author{ \email{revelle@northwestern.edu } \cr \url{https://personality-project.org/revelle.html} \cr } \seealso{ \code{\link{ICLUST.graph}},\code{\link{ICLUST.cluster}}, \code{\link{cluster.fit} }, \code{\link{VSS}}, \code{\link{omega} }} \examples{ f2 <- matrix(c(.9,.8,.7,rep(0,6),.6,.7,.8),ncol=2) mod <- factor.model(f2) round(mod,2) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/fa.sort.Rd0000644000176200001440000000440713403614177014031 0ustar liggesusers\name{fa.sort} \Rdversion{1.1} \alias{fa.sort} \alias{fa.organize} \title{Sort factor analysis or principal components analysis loadings} \description{Although the print.psych function will sort factor analysis loadings, sometimes it is useful to do this outside of the print function. fa.sort takes the output from the fa or principal functions and sorts the loadings for each factor. Items are located in terms of their greatest loading. The new order is returned as an element in the fa list. fa.organize allows for the columns or rows to be reorganized. } \usage{ fa.sort(fa.results,polar=FALSE) fa.organize(fa.results,o=NULL,i=NULL,cn=NULL,echelon=TRUE,flip=TRUE) } \arguments{ \item{fa.results}{The output from a factor analysis or principal components analysis using \code{\link{fa}} or \code{\link{principal}}. Can also just be a matrix of loadings. } \item{polar}{Sort by polar coordinates of first two factors (FALSE)} \item{o}{The order in which to order the factors} \item{i}{The order in which to order the items} \item{cn}{new factor names} \item{echelon}{Organize the factors so that they are in echelon form (variable 1 .. n on factor 1, n+1 ...n=k on factor 2, etc.) } \item{flip}{Flip factor loadings such that the colMean is positive.} } \details{ The fa.results$loadings are replaced with sorted loadings. fa.organize takes a factor analysis or components output and reorganizes the factors in the o order. Items are organized in the i order. This is useful when comparing alternative factor solutions. The flip option works only for the case of matrix input, not for full \code{\link{fa}} objects. Use the \code{\link{reflect}} function. } \value{ A sorted factor analysis, principal components analysis, or omega loadings matrix. These sorted values are used internally by the various diagram functions. The values returned are the same as \code{\link{fa}}, except in sorted order. In addition, the order is returned as an additional element in the fa list. } \author{William Revelle } \seealso{ See Also as \code{\link{fa}},\code{\link{print.psych}}, \code{\link{fa.diagram}}, } \examples{ test.simple <- fa(sim.item(16),2) fa.sort(test.simple) fa.organize(test.simple,c(2,1)) #the factors but not the items have been rearranged } \keyword{ multivariate } psych/man/sim.irt.Rd0000644000176200001440000003502214041632264014033 0ustar liggesusers\name{sim.irt} \alias{sim.irt} \alias{sim.rasch} \alias{sim.npl} \alias{sim.npn} \alias{sim.poly} \alias{sim.poly.npl} \alias{sim.poly.npn} \alias{sim.poly.ideal} \alias{sim.poly.ideal.npl} \alias{sim.poly.ideal.npn} \alias{sim.poly.mat} \title{Functions to simulate psychological/psychometric data.} \description{A number of functions in the psych package will generate simulated data with particular structures. These functions include \code{\link{sim}} for a factor simplex, and \code{\link{sim.simplex}} for a data simplex, \code{\link{sim.circ}} for a circumplex structure, \code{\link{sim.congeneric}} for a one factor factor congeneric model, \code{\link{sim.dichot}} to simulate dichotomous items, \code{\link{sim.hierarchical}} to create a hierarchical factor model, \code{\link{sim.item}} a more general item simulation, \code{\link{sim.minor}} to simulate major and minor factors, \code{\link{sim.omega}} to test various examples of omega, \code{\link{sim.parallel}} to compare the efficiency of various ways of deterimining the number of factors, \code{\link{sim.rasch}} to create simulated rasch data, \code{\link{sim.irt}} to create general 1 to 4 parameter IRT data by calling \code{\link{sim.npl}} 1 to 4 parameter logistic IRT or \code{\link{sim.npn}} 1 to 4 paramater normal IRT, \code{\link{sim.poly}} to create polytomous ideas by calling \code{\link{sim.poly.npn}} 1-4 parameter polytomous normal theory items or \code{\link{sim.poly.npl}} 1-4 parameter polytomous logistic items, and \code{\link{sim.poly.ideal}} which creates data following an ideal point or unfolding model by calling \code{\link{sim.poly.ideal.npn}} 1-4 parameter polytomous normal theory ideal point model or \code{\link{sim.poly.ideal.npl}} 1-4 parameter polytomous logistic ideal point model. \code{\link{sim.structural}} a general simulation of structural models, and \code{\link{sim.anova}} for ANOVA and lm simulations, and \code{\link{sim.VSS}}. Some of these functions are separately documented and are listed here for ease of the help function. See each function for more detailed help. } \usage{ sim.rasch(nvar = 5,n = 500, low=-3,high=3,d=NULL, a=1,mu=0,sd=1) sim.irt(nvar = 5, n = 500, low=-3, high=3, a=NULL,c=0, z=1,d=NULL, mu=0,sd=1, mod="logistic",theta=NULL) sim.npl(nvar = 5, n = 500, low=-3,high=3,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,theta=NULL) sim.npn(nvar = 5, n = 500, low=-3,high=3,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,theta=NULL) sim.poly(nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5,mod="logistic",theta=NULL) sim.poly.npn(nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0, sd=1, cat=5,theta=NULL) sim.poly.npl(nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0, sd=1, cat=5,theta=NULL) sim.poly.ideal(nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5,mod="logistic") sim.poly.ideal.npn(nvar = 5,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5) sim.poly.ideal.npl(nvar = 5,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5,theta=NULL) sim.poly.mat(R,m,n) } \arguments{ \item{n}{Number of cases to simulate} \item{mu}{The means for the items (if not 0)} \item{nvar}{Number of variables for a simplex structure} \item{low}{lower difficulty for sim.rasch or sim.irt} \item{high}{higher difficulty for sim.rasch or sim.irt} \item{a}{if not specified as a vector, the descrimination parameter a = \eqn{\alpha} will be set to 1.0 for all items} \item{d}{ if not specified as a vector, item difficulties (d = \eqn{\delta}) will range from low to high} \item{c}{the gamma parameter: if not specified as a vector, the guessing asymptote is set to 0} \item{z}{the zeta parameter: if not specified as a vector, set to 1} \item{sd}{the standard deviation for the underlying latent variable in the irt simulations} \item{mod}{which IRT model to use, mod="logistic" simulates a logistic function, otherwise, a normal function} \item{cat}{Number of categories to simulate in sim.poly. If cat=2, then this is the same as simulating t/f items and sim.poly is functionally equivalent to sim.irt} \item{theta}{The underlying latent trait value for each simulated subject} \item{R}{A correlation matrix to be simulated using the sim.poly.mat function} \item{m}{The matrix of marginals for all the items} } \details{Simulation of data structures is a very useful tool in psychometric research and teaching. By knowing ``truth" it is possible to see how well various algorithms can capture it. For a much longer discussion of the use of simulation in psychometrics, see the accompany vignettes. The simulations documented here are a miscellaneous set of functions that will be documented in other help files eventually. The default values for \code{\link{sim.structure}} is to generate a 4 factor, 12 variable data set with a simplex structure between the factors. This, and the simplex of items (\code{\link{sim.simplex}}) can also be converted in a STARS model with an autoregressive component (alpha) and a stable trait component (lambda). Two data structures that are particular challenges to exploratory factor analysis are the simplex structure and the presence of minor factors. Simplex structures \code{\link{sim.simplex}} will typically occur in developmental or learning contexts and have a correlation structure of r between adjacent variables and r^n for variables n apart. Although just one latent variable (r) needs to be estimated, the structure will have nvar-1 factors. An alternative version of the simplex is the State-Trait-Auto Regressive Structure (STARS) which has both a simplex state structure, with autoregressive path alpha and a trait structure with path lambda. This simulated in \code{\link{sim.simplex}} by specifying a non-zero lambda value. Many simulations of factor structures assume that except for the major factors, all residuals are normally distributed around 0. An alternative, and perhaps more realistic situation, is that the there are a few major (big) factors and many minor (small) factors. The challenge is thus to identify the major factors. \code{\link{sim.minor}} generates such structures. The structures generated can be thought of as havinga a major factor structure with some small correlated residuals. To make these simulations complete, the possibility of a general factor is considered. For simplicity, sim.minor allows one to specify a set of loadings to be sampled from for g, fmajor and fminor. Alternatively, it is possible to specify the complete factor matrix. Another structure worth considering is direct modeling of a general factor with several group factors. This is done using \code{\link{sim.general}}. Although coefficient \eqn{\omega}{\omega} is a very useful indicator of the general factor saturation of a unifactorial test (one with perhaps several sub factors), it has problems with the case of multiple, independent factors. In this situation, one of the factors is labelled as ``general'' and the omega estimate is too large. This situation may be explored using the \code{\link{sim.omega}} function with general left as NULL. If there is a general factor, then results from \code{\link{sim.omega}} suggests that omega estimated either from EFA or from SEM does a pretty good job of identifying it but that the EFA approach using Schmid-Leiman transformation is somewhat more robust than the SEM approach. The four irt simulations, sim.rasch, sim.irt, sim.npl and sim.npn, simulate dichotomous items following the Item Response model. sim.irt just calls either sim.npl (for logistic models) or sim.npn (for normal models) depending upon the specification of the model. The logistic model is \deqn{P(i,j) = \gamma + \frac{\zeta-\gamma}{1+ e^{\alpha(\delta-\theta)}}}{P(i,j) = \gamma + (\zeta-\gamma)/(1+ exp(\alpha(\delta-\theta)))} where \eqn{\gamma} is the lower asymptote or guesssing parameter, \eqn{\zeta} is the upper asymptote (normally 1), \eqn{\alpha} is item discrimination and \eqn{\delta} is item difficulty. For the 1 Paramater Logistic (Rasch) model, gamma=0, zeta=1, alpha=1 and item difficulty is the only free parameter to specify. For the 2PL and 2PN models, a = \eqn{\alpha} and d = \eqn{\delta} are specified. \cr For the 3PL or 3PN models, items also differ in their guessing parameter c =\eqn{\gamma}. \cr For the 4PL and 4PN models, the upper asymptote, z= \eqn{\zeta} is also specified. \cr (Graphics of these may be seen in the demonstrations for the \code{\link{logistic}} function.) The normal model (irt.npn calculates the probability using pnorm instead of the logistic function used in irt.npl, but the meaning of the parameters are otherwise the same. With the a = \eqn{\alpha} parameter = 1.702 in the logistic model the two models are practically identical. In parallel to the dichotomous IRT simulations are the poly versions which simulate polytomous item models. They have the additional parameter of how many categories to simulate. In addition, the \code{\link{sim.poly.ideal}} functions will simulate an ideal point or unfolding model in which the response probability varies by the distance from each subject's ideal point. Some have claimed that this is a more appropriate model of the responses to personality questionnaires. It will lead to simplex like structures which may be fit by a two factor model. The middle items form one factor, the extreme a bipolar factor. By default, the theta parameter is created in each function as normally distributed with mean mu=0 and sd=1. In the case where you want to specify the theta to be equivalent from another simulation or fixed for a particular experimental condition, either take the theta object from the output of a previous simulation, or create it using whatever properties are desired. The previous functions all assume one latent trait. Alternatively, we can simulate dichotomous or polytomous items with a particular structure using the sim.poly.mat function. This takes as input the population correlation matrix, the population marginals, and the sample size. It returns categorical items with the specified structure. Other simulation functions in psych are: \code{\link{sim.structure}} A function to combine a measurement and structural model into one data matrix. Useful for understanding structural equation models. Combined with \code{\link{structure.diagram}} to see the proposed structure. \code{\link{sim.congeneric}} A function to create congeneric items/tests for demonstrating classical test theory. This is just a special case of sim.structure. \code{\link{sim.hierarchical}} A function to create data with a hierarchical (bifactor) structure. \code{\link{sim.item}} A function to create items that either have a simple structure or a circumplex structure. \code{\link{sim.circ}} Create data with a circumplex structure. \code{\link{sim.dichot}} Create dichotomous item data with a simple or circumplex structure. \code{\link{sim.minor}} Create a factor structure for nvar variables defined by nfact major factors and nvar/2 ``minor" factors for n observations. Although the standard factor model assumes that K major factors (K << nvar) will account for the correlations among the variables \deqn{R = FF' + U^2} where R is of rank P and F is a P x K matrix of factor coefficients and U is a diagonal matrix of uniquenesses. However, in many cases, particularly when working with items, there are many small factors (sometimes referred to as correlated residuals) that need to be considered as well. This leads to a data structure such that \deqn{R = FF' + MM' + U^2} where R is a P x P matrix of correlations, F is a P x K factor loading matrix, M is a P x P/2 matrix of minor factor loadings, and U is a diagonal matrix (P x P) of uniquenesses. Such a correlation matrix will have a poor \eqn{\chi^2} value in terms of goodness of fit if just the K factors are extracted, even though for all intents and purposes, it is well fit. sim.minor will generate such data sets with big factors with loadings of .6 to .8 and small factors with loadings of -.2 to .2. These may both be adjusted. \code{\link{sim.parallel}} Create a number of simulated data sets using sim.minor to show how parallel analysis works. The general observation is that with the presence of minor factors, parallel analysis is probably best done with component eigen values rather than factor eigen values, even when using the factor model. \code{\link{sim.anova}} Simulate a 3 way balanced ANOVA or linear model, with or without repeated measures. Useful for teaching research methods and generating teaching examples. \code{\link{sim.multilevel}} To understand some of the basic concepts of multilevel modeling, it is useful to create multilevel structures. The correlations of aggregated data is sometimes called an 'ecological correlation'. That group level and individual level correlations are independent makes such inferences problematic. This simulation allows for demonstrations that correlations within groups do not imply, nor are implied by, correlations between group means. } \references{Revelle, W. (in preparation) An Introduction to Psychometric Theory with applications in R. Springer. at \url{https://personality-project.org/r/book/} } \author{William Revelle} \seealso{ See above} \examples{ simplex <- sim.simplex() #create the default simplex structure lowerMat(simplex) #the correlation matrix #create a congeneric matrix congeneric <- sim.congeneric() lowerMat(congeneric) R <- sim.hierarchical() lowerMat(R) #now simulate categorical items with the hierarchical factor structure. #Let the items be dichotomous with varying item difficulties. marginals = matrix(c(seq(.1,.9,.1),seq(.9,.1,-.1)),byrow=TRUE,nrow=2) X <- sim.poly.mat(R=R,m=marginals,n=1000) lowerCor(X) #show the raw correlations #lowerMat(tetrachoric(X)$rho) # show the tetrachoric correlations (not run) #generate a structure fx <- matrix(c(.9,.8,.7,rep(0,6),c(.8,.7,.6)),ncol=2) fy <- c(.6,.5,.4) Phi <- matrix(c(1,0,.5,0,1,.4,0,0,0),ncol=3) R <- sim.structure(fx,Phi,fy) cor.plot(R$model) #show it graphically simp <- sim.simplex() #show the simplex structure using cor.plot cor.plot(simp,colors=TRUE,main="A simplex structure") #Show a STARS model simp <- sim.simplex(alpha=.8,lambda=.4) #show the simplex structure using cor.plot cor.plot(simp,colors=TRUE,main="State Trait Auto Regressive Simplex" ) dichot.sim <- sim.irt() #simulate 5 dichotomous items poly.sim <- sim.poly(theta=dichot.sim$theta) #simulate 5 polytomous items that correlate #with the dichotomous items } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{datagen} psych/man/irt.responses.Rd0000644000176200001440000000625013463370040015263 0ustar liggesusers\name{irt.responses} \alias{irt.responses} \title{Plot probability of multiple choice responses as a function of a latent trait } \description{When analyzing ability tests, it is important to consider how the distractor alternatives vary as a function of the latent trait. The simple graphical solution is to plot response endorsement frequencies against the values of the latent trait found from multiple items. A good item is one in which the probability of the distractors decrease and the keyed answer increases as the latent trait increases. } \usage{ irt.responses(theta,items, breaks = 11,show.missing=FALSE, show.legend=TRUE, legend.location="topleft", colors=NULL,...) } \arguments{ \item{theta}{The estimated latent trait (found, for example by using \code{\link{score.irt}}). } \item{items}{ A matrix or data frame of the multiple choice item responses.} \item{breaks}{The number of levels of the theta to use to form the probability estimates. May be increased if there are enough cases. } \item{show.legend}{Show the legend} \item{show.missing}{For some SAPA data sets, there are a very large number of missing responses. In general, we do not want to show their frequency.} \item{legend.location}{Choose among c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center","none"). The default is "topleft".} \item{colors}{if NULL, then use the default colors, otherwise, specify the color choices. The basic color palette is c("black", "blue", "red", "darkgreen", "gold2", "gray50", "cornflowerblue", "mediumorchid2").} \item{...}{Other parameters for plots and points} } \details{This function is a convenient way to analyze the quality of item alternatives in a multiple choice ability test. The typical use is to first score the test (using, e.g., \code{\link{score.multiple.choice}} according to some scoring key and to then find the \code{\link{score.irt}} based scores. Response frequencies for each alternative are then plotted against total score. An ideal item is one in which just one alternative (the correct one) has a monotonically increasing response probability. Because of the similar pattern of results for IRT based or simple sum based item scoring, the function can be run on scores calculated either by \code{\link{score.irt}} or by \code{\link{score.multiple.choice}}. In the latter case, the number of breaks should not exceed the number of possible score alternatives. } \value{Graphic output} \references{ Revelle, W. An introduction to psychometric theory with applications in R (in prep) Springer. Draft chapters available at \url{https://personality-project.org/r/book/} } \author{William Revelle } \seealso{ \code{\link{score.multiple.choice}}, \code{\link{score.irt}} } \examples{ data(psychTools::iqitems) iq.keys <- c(4,4,4, 6,6,3,4,4, 5,2,2,4, 3,2,6,7) scores <- score.multiple.choice(iq.keys,psychTools::iqitems,score=TRUE,short=FALSE) #note that for speed we can just do this on simple item counts rather # than IRT based scores. op <- par(mfrow=c(2,2)) #set this to see the output for multiple items irt.responses(scores$scores,psychTools::iqitems[1:4],breaks=11) op <- par(op) } \keyword{multivariate } \keyword{models}psych/man/error.bars.Rd0000644000176200001440000002070214444326423014530 0ustar liggesusers\name{error.bars} \alias{error.bars} \alias{error.bars.tab} \title{Plot means and confidence intervals} \description{One of the many functions in R to plot means and confidence intervals. Can be done using barplots if desired. Can also be combined with such functions as boxplot or violin to summarize distributions. Means and standard errors are calculated from the raw data using \code{\link{describe}}. Alternatively, plots of means +/- one standard deviation may be drawn. } \usage{ error.bars(x,stats=NULL,data=NULL,group=NULL, ylab = "Dependent Variable", xlab="Independent Variable", main=NULL,eyes=TRUE, ylim = NULL, xlim=NULL, alpha=.05, sd=FALSE, labels = NULL, pos = NULL, arrow.len = 0.05, arrow.col="black",add = FALSE,bars=FALSE, within=FALSE, col=c("black","blue","red"), density=-10,...) error.bars.tab(t,way="columns",raw=FALSE,col=c('blue','red'),...) } \arguments{ \item{x}{ A data frame or matrix of raw data OR, a formula of the form DV ~ IV. If formula input is specified, error.bars.by is called. } \item{t}{A table of frequencies} \item{stats}{Alternatively, a data.frame of descriptive stats from (e.g., describe). if specified, the means, sd, n and perhaps of se of a data set to be plotted} \item{data}{If using formula input, specify the object where the data may found} \item{group}{If not null, then do error.bars.by syntax} \item{ylab}{y label} \item{xlab}{x label} \item{main}{title for figure} \item{ylim}{if specified, the limits for the plot, otherwise based upon the data} \item{xlim}{if specified, the x limits for the plot, otherwise c(.5,nvar + .5)} \item{eyes}{should 'cats eyes' plots be drawn} \item{alpha}{alpha level of confidence interval -- defaults to 95\% confidence interval} \item{sd}{if TRUE, draw one standard deviation instead of standard errors at the alpha level} \item{labels}{ X axis label } \item{pos}{where to place text: below, left, above, right} \item{arrow.len}{ How long should the top of the error bars be?} \item{arrow.col}{What color should the error bars be?} \item{add}{ add=FALSE, new plot, add=TRUE, just points and error bars} \item{bars}{bars=TRUE will draw a bar graph if you really want to do that} \item{within}{should the error variance of a variable be corrected by 1-SMC?} \item{col}{color(s) of the catseyes. Defaults to blue.} \item{density}{If negative, solid colors, if positive, how many lines to draw} \item{way}{Percentages are based upon the row totals (default) column totals, or grand total of the data Table} \item{raw}{If raw is FALSE, display the graphs in terms of probability, raw TRUE displays the data in terms of raw counts} \item{\dots}{other parameters to pass to the plot function, e.g., typ="b" to draw lines, lty="dashed" to draw dashed lines} } \details{Drawing the mean +/- a confidence interval is a frequently used function when reporting experimental results. By default, the confidence interval is 1.96 standard errors of the t-distribution. If within=TRUE, the error bars are corrected for the correlation with the other variables by reducing the variance by a factor of (1-smc). This allows for comparisons between variables. The error bars are normally calculated from the data using the \code{\link{describe}} function. If, alternatively, a matrix of statistics is provided with column headings of values, means, and se, then those values will be used for the plot (using the stats option). If n is included in the matrix of statistics, then the distribution is drawn for a t distribution for n-1 df. If n is omitted (NULL) or is NA, then the distribution will be a normal distribution. If sd is TRUE, then the error bars will represent one standard deviation from the mean rather than be a function of alpha and the standard errors. See the last two examples for the case of plotting data with statistics from another function. Alternatively, \code{\link{error.bars.tab}} will take tabulated data and convert to either row, column or overall percentages, and then plot these as percentages with the equivalent standard error (based upon sqrt(pq/N)). In August, 2018, the functionality of \code{\link{error.bars}} and \code{\link{error.bars.by}} were combined so that if groups are specified, then the error bars are done by group. Furthermore, if the x variable is a formula of the form DV ~ IV, then \code{\link{error.bars.by}} is called to do the plotting. } \value{Graphic output showing the means + x% confidence intervals. For ci=1.96 and normal data, this will be the 95\% confidence region. For ci=1, the 68\% confidence region. These confidence regions are based upon normal theory and do not take into account any skew in the variables. More accurate confidence intervals could be found by resampling. The error.bars.tab function will return (invisibly) the cell means and standard errors. } \author{William Revelle} \seealso{ \code{\link{error.crosses}} for two way error bars, \code{\link{error.bars.by}} for error bars for different groups as well as \code{\link{error.dots}}. The \code{\link{error.bars.by}} is useful for showing the results of one or two way ANOVAs in that it will display means and CIs for one or more DVs for one or two IVs. In addition, as pointed out by Jim Lemon on the R-help news group, error bars or confidence intervals may be drawn using \tabular{ll}{ function \tab package \cr bar.err \tab (agricolae) \cr plotCI \tab (gplots) \cr xYplot \tab(Hmisc) \cr dispersion \tab(plotrix) \cr plotCI \tab(plotrix) \cr } For advice why not to draw bar graphs with error bars, see the page at biostat.mc.vanderbilt.edu/wiki/Main/DynamitePlots. } \examples{ set.seed(42) x <- matrix(rnorm(1000),ncol=20) boxplot(x,notch=TRUE,main="Notched boxplot with error bars") error.bars(x,add=TRUE) abline(h=0) #show 50% confidence regions and color each variable separately error.bars(attitude,alpha=.5, main="50 percent confidence limits",col=rainbow(ncol(attitude)) ) error.bars(attitude,bar=TRUE) #show the use of bar graphs #combine with a strip chart and boxplot stripchart(attitude,vertical=TRUE,method="jitter",jitter=.1,pch=19, main="Stripchart with 95 percent confidence limits") boxplot(attitude,add=TRUE) error.bars(attitude,add=TRUE,arrow.len=.2) #use statistics from somewhere else #by specifying n, we are using the t distribution for confidences #The first example allows the variables to be spaced along the x axis my.stats <- data.frame(values=c(1,2,8),mean=c(10,12,18),se=c(2,3,5),n=c(5,10,20)) error.bars(stats=my.stats,type="b",main="data with confidence intervals") #don't connect the groups my.stats <- data.frame(values=c(1,2,8),mean=c(10,12,18),se=c(2,3,5),n=c(5,10,20)) error.bars(stats=my.stats,main="data with confidence intervals") #by not specifying value, the groups are equally spaced my.stats <- data.frame(mean=c(10,12,18),se=c(2,3,5),n=c(5,10,20)) rownames(my.stats) <- c("First", "Second","Third") error.bars(stats=my.stats,xlab="Condition",ylab="Score") #Consider the case where we get stats from describe temp <- describe(attitude) error.bars(stats=temp) #show these do not differ from the other way by overlaying the two error.bars(attitude,add=TRUE,col="red") #n is omitted #the error distribution is a normal distribution my.stats <- data.frame(mean=c(2,4,8),se=c(2,1,2)) rownames(my.stats) <- c("First", "Second","Third") error.bars(stats=my.stats,xlab="Condition",ylab="Score") #n is specified #compare this with small n which shows larger confidence regions my.stats <- data.frame(mean=c(2,4,8),se=c(2,1,2),n=c(10,10,3)) rownames(my.stats) <- c("First", "Second","Third") error.bars(stats=my.stats,xlab="Condition",ylab="Score") #example of arrest rates (as percentage of condition) arrest <- data.frame(Control=c(14,21),Treated =c(3,23)) rownames(arrest) <- c("Arrested","Not Arrested") error.bars.tab(arrest,ylab="Probability of Arrest",xlab="Control vs Treatment", main="Probability of Arrest varies by treatment") #Show the raw rates error.bars.tab(arrest,raw=TRUE,ylab="Number Arrested",xlab="Control vs Treatment", main="Count of Arrest varies by treatment") #If a grouping variable is specified, the function calls error.bars.by #Use error.bars.by to have more control over the output. #Show how to use grouping variables error.bars(SATV + SATQ ~ gender, data=sat.act) #one grouping variable, formula input error.bars(SATV + SATQ ~ education + gender,data=sat.act)#two grouping variables } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/test.psych.Rd0000755000176200001440000000636713463367374014605 0ustar liggesusers\name{test.psych} \alias{test.psych} \title{ Testing of functions in the psych package } \description{Test to make sure the psych functions run on basic test data sets } \usage{ test.psych(first=1,last=5,short=TRUE,all=FALSE,fapc=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{first}{first=1: start with dataset first} \item{last}{last=5: test for datasets until last} \item{short}{short=TRUE - don't return any analyses} \item{all}{To get around a failure on certain Solaris 32 bit systems, all=FALSE is the default} \item{fapc}{if fapc=TRUE, then do a whole series of tests of factor and principal component extraction and rotations.} } \details{When modifying the psych package, it is useful to make sure that adding some code does not break something else. The test.psych function tests the major functions on various standard data sets. It also shows off a number of the capabilities of the psych package. Uses 5 standard data sets: \cr USArrests Violent Crime Rates by US State (4 variables) \cr attitude The Chatterjee-Price Attitude Data \cr Harman23.cor$cov Harman Example 2.3 8 physical measurements \cr Harman74.cor$cov Harman Example 7.4 24 mental measurements \cr ability.cov$cov 8 Ability and Intelligence Tests \cr It also uses the bfi and ability data sets from psych } \value{ \item{out }{if short=FALSE, then list of the output from all functions tested} } \author{ William Revelle} \note{ Although test.psych may be used as a quick demo of the various functions in the psych packge, in general, it is better to try the specific functions themselves. The main purpose of test.psych is to make sure functions throw error messages or correct for weird conditions. The datasets tested are part of the standard R data sets and represent some of the basic problems encountered. When version 1.1.10 was released, it caused errors when compiling and testing on some Solaris 32 bit systems. The all option was added to avoid this problem (since I can't replicate the problem on Macs or PCs). all=TRUE adds one more test, for a non-positive definite matrix.} \section{Warning }{Warning messages will be thrown by fa.parallel and sometimes by fa for random datasets.} \examples{ #test <- test.psych() #not run #test.psych(all=TRUE) # f3 <- fa(bfi[1:15],3,n.iter=5) # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="Varimax") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="varimax") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="bifactor") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="varimin") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="bentlerT") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="geominT") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="equamax") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="Promax") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="cluster") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="biquartimin") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="equamax") # f3 <- fa(bfi[1:15],3,n.iter=5,rotate="Promax") # # fpoly <- fa(bfi[1:10],2,n.iter=5,cor="poly") # f1 <- fa(psychTools::ability,n.iter=4) # f1p <- fa(psychTools::ability,n.iter=4,cor="tet") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/describe.by.Rd0000644000176200001440000000651613724466373014662 0ustar liggesusers\name{describeBy} \alias{describeBy} \alias{describe.by} \title{ Basic summary statistics by group} \description{Report basic summary statistics by a grouping variable. Useful if the grouping variable is some experimental variable and data are to be aggregated for plotting. Partly a wrapper for by and \code{\link{describe}} } \usage{ describeBy(x, group=NULL,mat=FALSE,type=3,digits=15,data,...) describe.by(x, group=NULL,mat=FALSE,type=3,...) # deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a data.frame or matrix. See note for statsBy. } \item{group}{a grouping variable or a list of grouping variables. (may be ignored if calling using the formula mode.)} \item{mat}{provide a matrix output rather than a list} \item{type}{Which type of skew and kurtosis should be found} \item{digits}{When giving matrix output, how many digits should be reported?} \item{data}{Needed if using formula input} \item{...}{parameters to be passed to describe} } \details{To get descriptive statistics for several different grouping variables, make sure that group is a list. In the case of matrix output with multiple grouping variables, the grouping variable values are added to the output. As of July, 2020, the grouping variable(s) may be specified in formula mode (see the examples). The type parameter specifies which version of skew and kurtosis should be found. See \code{\link{describe}} for more details. An alternative function (\code{\link{statsBy}}) returns a list of means, n, and standard deviations for each group. This is particularly useful if finding weighted correlations of group means using \code{\link{cor.wt}}. More importantly, it does a proper within and between group decomposition of the correlation. \code{\link{cohen.d}} will work for two groups. It converts the data into mean differences and pools the within group standard deviations. Returns cohen.d statistic as well as the multivariate generalization (Mahalanobis D). } \value{ A data.frame of the relevant statistics broken down by group: \cr item name \cr item number \cr number of valid cases\cr mean\cr standard deviation\cr median\cr mad: median absolute deviation (from the median) \cr minimum\cr maximum\cr skew\cr standard error\cr } \author{ William Revelle} \seealso{ \code{\link{describe}}, \code{\link{statsBy}}, \code{\link{densityBy}} and \code{\link{violinBy}}, \code{\link{cohen.d}}, \code{\link{cohen.d.by}}, and \code{\link{cohen.d.ci}} as well as \code{\link{error.bars}} and \code{\link{error.bars.by}} for other graphical displays. } \examples{ data(sat.act) describeBy(sat.act,sat.act$gender) #just one grouping variable describeBy(sat.act ~ gender) #describe the entire set formula input describeBy(SATV + SATQ ~ gender,data =sat.act) #specify the data set if using formula #describeBy(sat.act,list(sat.act$gender,sat.act$education)) #two grouping variables describeBy(sat.act ~ gender + education) #two grouping variables des.mat <- describeBy(age ~ education,mat=TRUE,data = sat.act) #matrix (data.frame) output des.mat <- describeBy(age ~ education + gender, data=sat.act, mat=TRUE,digits=2) #matrix output rounded to 2 decimals } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ univar }% __ONLY ONE__ keyword per line psych/man/multi.hist.Rd0000644000176200001440000000620114546077151014553 0ustar liggesusers\name{multi.hist} \alias{multi.hist} \alias{histo.density} \alias{histBy} \title{ Multiple histograms with density and normal fits on one page} \description{Given a matrix or data.frame, produce histograms for each variable in a "matrix" form. Include normal fits and density distributions for each plot. The number of rows and columns may be specified, or calculated. May be used for single variables. } \usage{multi.hist(x,nrow=NULL,ncol=NULL,density=TRUE,freq=FALSE,bcol="white", dcol=c("black","black"),dlty=c("dashed","dotted"), main=NULL,mar=c(2,1,1,1),breaks=21,global=TRUE,...) histBy(x,var,group,data=NULL,density=TRUE,alpha=.5,breaks=21,col,xlab, main="Histograms by group",freq=FALSE,...) } \arguments{ \item{x}{ matrix or data.frame} \item{var}{The variable in x to plot in histBy} \item{group}{The name of the variable in x to use as the grouping variable} \item{data}{Needs to be specified if using formula input to histBy} \item{nrow}{number of rows in the plot} \item{ncol}{number of columns in the plot} \item{density}{density=TRUE, show the normal fits and density distributions} \item{freq}{freq=FALSE shows probability densities and density distribution, freq=TRUE shows frequencies} \item{bcol}{Color for the bars} \item{dcol}{The color(s) for the normal and the density fits. Defaults to black. } \item{dlty}{The line type (lty) of the normal and density fits. (specify the optional graphic parameter lwd to change the line size)} \item{main}{title for each panel will be set to the column name unless specified} \item{mar}{Specify the lower, left, upper and right hand side margin in lines -- set to be tighter than normal default of c(5,4,4,2) + .1 } \item{xlab}{Label for the x variable} \item{breaks}{The number of breaks in histBy (see hist)} \item{global}{If TRUE, use the same x-axis for all plots} \item{alpha}{The degree of transparency of the overlapping bars in histBy} \item{col}{A vector of colors in histBy (defaults to the rainbow)} \item{...}{additional graphic parameters (e.g., col)} } \details{This allows for quick summaries of multiple distributions. Particularly useful when examining the results of multiple-split halves that come from the \code{\link{reliability}} function. By default, will try to make a square plot with equal number of rows and columns. However, the number of columns and rows may be specified for a particular plot. } \author{ William Revelle } \seealso{ \code{\link{bi.bars}} for drawing pairwise histograms and \code{\link{scatterHist}} for bivariate scatter and histograms. \code{\link{densityBy}}, \code{\link{violinBy}} and \code{\link{violin}} for density plots.} \examples{ multi.hist(sat.act) multi.hist(sat.act,bcol="red") multi.hist(sat.act,dcol="blue") #make both lines blue multi.hist(sat.act,dcol= c("blue","red"),dlty=c("dotted", "solid")) multi.hist(sat.act,freq=TRUE) #show the frequency plot multi.hist(sat.act,nrow=2) histBy(sat.act,"SATQ","gender") #input by variable names histBy(SATQ~ gender, data=sat.act) #formula input } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/cortest.bartlett.Rd0000644000176200001440000000413213463341201015742 0ustar liggesusers\name{cortest.bartlett} \alias{cortest.bartlett} \title{Bartlett's test that a correlation matrix is an identity matrix } \description{Bartlett (1951) proposed that -ln(det(R)*(N-1 - (2p+5)/6) was distributed as chi square if R were an identity matrix. A useful test that residuals correlations are all zero. Contrast to the Kaiser-Meyer-Olkin test. } \usage{ cortest.bartlett(R, n = NULL,diag=TRUE) } \arguments{ \item{R}{A correlation matrix. (If R is not square, correlations are found and a warning is issued. } \item{n}{Sample size (if not specified, 100 is assumed).} \item{diag}{Will replace the diagonal of the matrix with 1s to make it a correlation matrix.} } \details{More useful for pedagogical purposes than actual applications. The Bartlett test is asymptotically chi square distributed. Note that if applied to residuals from factor analysis (\code{\link{fa}}) or principal components analysis (\code{\link{principal}}) that the diagonal must be replaced with 1s. This is done automatically if diag=TRUE. (See examples.) An Alternative way of testing whether a correlation matrix is factorable (i.e., the correlations differ from 0) is the Kaiser-Meyer-Olkin \code{\link{KMO}} test of factorial adequacy. } \value{ \item{chisq}{Assymptotically chisquare} \item{p.value }{Of chi square} \item{df}{The degrees of freedom} } \references{ Bartlett, M. S., (1951), The Effect of Standardization on a chi square Approximation in Factor Analysis, Biometrika, 38, 337-344. } \author{William Revelle} \seealso{ \code{\link{cortest.mat}}, \code{\link{cortest.normal}}, \code{\link{cortest.jennrich}}} \examples{ set.seed(42) x <- matrix(rnorm(1000),ncol=10) r <- cor(x) cortest.bartlett(r) #random data don't differ from an identity matrix #data(bfi) cortest.bartlett(psychTools::bfi[1:200,1:10]) #not an identity matrix f3 <- fa(Thurstone,3) f3r <- f3$resid cortest.bartlett(f3r,n=213,diag=FALSE) #incorrect cortest.bartlett(f3r,n=213,diag=TRUE) #correct (by default) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/AUC.Rd0000644000176200001440000001744714215117411013064 0ustar liggesusers\name{AUC} \alias{AUC} \alias{auc} \alias{Specificity} \alias{Sensitivity} \title{Decision Theory measures of specificity, sensitivity, and d prime} \description{In many fields, decisions and outcomes are categorical even though the underlying phenomenon are probably continuous. E.g. students are accepted to graduate school or not, they finish or not. X-Rays are diagnosed as patients having cancer or not. Outcomes of such decisions are usually labeled as Valid Positives, Valid Negatives, False Positives and False Negatives. In hypothesis testing, False Positives are known as Type I errors, while False Negatives are Type II errors. The relationship between these four cells depends upon the correlation between the decision rule and the outcome as well as the level of evidence needed for a decision (the criterion). Signal Detection Theory and Decision Theory have a number of related measures of performance (accuracy = VP + VN), Sensitivity (VP/(VP + FN)), Specificity (1 - FP), d prime (d'), and the area under the Response Operating Characteristic Curve (AUC). More generally, these are examples of correlations based upon dichotomous data. \code{\link{AUC}} addresses some of these questions. } \usage{ AUC(t=NULL,BR=NULL,SR=NULL,Phi=NULL,VP=NULL,labels=NULL,plot="b",zero=TRUE,correct=.5, col=c("blue","red")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{t}{a 4 x 1 vector or a 2 x2 table of TP, FP, FN, TN values (see below) May be counts or proportions.} \item{BR}{Base Rate of successful outcomes or actual symptom (if t is not specified)} \item{SR}{Selection Rate for candidates or diagnoses (if t is not specified)} \item{Phi}{The Phi correlation coefficient between the predictor and the outcome variable (if t is not specified)} \item{VP}{The number of Valid Positives (selected applicants who succeed; correct diagnoses).(if t and Phi are not specified)} \item{labels}{Names of variables 1 and 2} \item{plot}{"b" (both), "d" (decision theory), "a" (auc), or "n" neither} \item{zero}{If True, then the noise distribution is centered at zero} \item{correct}{Cell values of 0 are replaced with correct. (See \code{\link{tetrachoric}} for a discussion of why this is needed.)} \item{col}{The color choice for the VP and FP, defaults to =c("blue","red") but could be c("grey","black") if we want to avoid colors } } \value{ \item{phi}{Phi coefficient of the two by two table} \item{tetra}{Tetrachoric (latent) coefficient inferred from the two by two table} \item{r.bis}{Biserial correlation of continuous state of world with decision} \item{observed}{The observed input (as a check)} \item{probabilities}{Observed values/ total number of observations} \item{conditional}{prob / rowSums(prob)} \item{Accuracy}{percentage of True Positives + True Negatives} \item{Sensitivity}{VP/(VP + FN)} \item{Specificity}{VN/(FP + VN)} \item{d.prime}{difference of True Positives versus True Negatives} \item{beta}{ratio of ordinates at the decision point} } \details{The problem of making binary decisions about the state of the world is ubiquitous. We see this in Null Hypothesis Significance Testing (NHST), medical diagnoses, and selection for occupations. Variously known as NHST, Signal Detection Theory, clinical Assessment, or college admissions, all of these domains share the same two x two decision task. Although the underlying phenomena are probably continuous, a typical decision or diagnostic situation makes dichotomous decisions: Accept or Reject, correctly identified, incorrectly identified. In Signal Detection Theory, the world has two states: Noise versus Signal + Noise. The decision is whether there is a signal or not. In diagnoses, it is whether to diagnose an illness or not given some noisy signal (e.g., an X-Ray, a set of diagnostic tests). In college admissions, we accept some students and reject others. Four-Five years later we observe who "succeeds" or graduates. All of these decisions lead to four cells based upon a two x two categorization. Given the true state of the world is Positive or Negative, and a rater assigns positive or negative ratings, then the resulting two by two table has True (Valid) Positives and True (Valid) Negatives on the diagonal and False Positives and False Negatives off the diagonal. When expressed as percentages of the total, then Base Rates (BR) depend upon the state of the world, but Selection Ratios (SR) are under the control of the person making the decision and affect the number of False Positives and the number of Valid Positives. Given a two x two table of counts or percentages \cr \tabular{lllll}{ \tab \tab Decide + \tab Decide - \cr \tab True + \tab Valid Positive \tab False Negative \tab Base Rate \% \cr \tab True - \tab False Positive \tab Valid Negative \tab 1- Base Rate \cr \tab \tab Selection ratio \tab 1 - Selection ratio \tab (Total N) } Unfortunately, although this way of categorizing the data is typical in assessment (e.g., Wiggins 1973), and everything is expressed as percentages of the total, in some decision papers, VP are expressed as the ratio of VP to total positive decisions (e.g., Wickens, 1984). This requires dividing through by the column totals (and represented as VP* and FP* in the table below). The relationships implied by these data can be summarized as a \code{\link{phi}} or \code{\link{tetrachoric}} correlation between the raters and the world, or as a decision process with several alternative measures. If we make the assumption that the two dimensions are continuous and were artificially dichotomised, then the \code{\link{tetrachoric}} correlation is an estimate of the continuous correlation between these two latent dimensions. If we think of the data as truly representing two states e.g., vaccinated or not vaccinanated, dead or alive, then the \code{\link{phi}} coefficient is more appropriate. Sensitivity, Specificity, Accuracy, Area Under the Curve, and d' (d prime). These measures may be defined as \cr \tabular{llll}{ \tab Measure \tab Definition \cr \tab Sensitivity \tab VP/(VP+ FN) \cr \tab Specificity\tab VN/(FP + VN) \cr \tab Accuracy \tab VP + VN \cr \tab VP* \tab VP/(VP + FP) \cr \tab FP* \tab (FP/(VP + FP \cr \tab d' \tab z(VP*) - z(FP*) \cr \tab d' \tab sqrt(2) z(AUC) \cr \tab beta \tab prob(X/S)/(prob(X/N)) \cr } Although only one point is found, we can form a graphical display of VP versus FP as a smooth curve as a function of the decision criterion. The smooth curve assumes normality whereas the other merely are the two line segments between the points (0,0), (FP,VP), (1,1). The resulting correlation between the inferred continuous state of the world and the dichotomous decision process is a biserial correlation. When using table input, the values can be counts and thus greater than 1 or merely probabilities which should add up to 1. Base Rates and Selection Ratios are proportions and thus less than 1. } \author{William Revelle } \references{ Metz, C.E. (1978) Basic principles of ROC analysis. Seminars in Nuclear Medicine, 8, 283-298. Wiggins, Jerry S. (1973) Personality and Prediction: Principles of Personality Assessment. Addison-Wesley. Wickens, Christopher D. (1984) Engineering Psychology and Human Performance. Merrill. } \seealso{ \code{\link{phi}}, \code{\link{phi2tetra}} ,\code{\link{Yule}}, \code{\link{Yule.inv}} \code{\link{Yule2phi}}, \code{\link{tetrachoric}} and \code{\link{polychoric}}, \code{\link{comorbidity}}} \examples{ AUC(c(30,20,20,30)) #specify the table input AUC(c(140,60,100,900)) #Metz example with colors AUC(c(140,60,100,900),col=c("grey","black")) #Metz example 1 no colors AUC(c(80,120,40, 960)) #Metz example 2 Note how the accuracies are the same but d's differ AUC(c(49,40,79,336)) #Wiggins p 249 AUC(BR=.05,SR=.254,Phi = .317) #Wiggins 251 extreme Base Rates } \keyword{multivariate } psych/man/cortest.mat.Rd0000644000176200001440000001205514527725720014723 0ustar liggesusers\name{cortest} \alias{cortest.mat} \alias{cortest.normal} \alias{cortest} \alias{cortest.jennrich} \title{Chi square tests of whether a single matrix is an identity matrix, or a pair of matrices are equal. } \description{Steiger (1980) pointed out that the sum of the squared elements of a correlation matrix, or the Fisher z score equivalents, is distributed as chi square under the null hypothesis that the values are zero (i.e., elements of the identity matrix). This is particularly useful for examining whether correlations in a single matrix differ from zero or for comparing two matrices. Jennrich (1970) also examined tests of differences between matrices. } \usage{ cortest(R1,R2=NULL,n1=NULL,n2 = NULL, fisher = TRUE,cor=TRUE,method="pearson", use ="pairwise") #same as cortest.normal this does the steiger test cortest.normal(R1, R2 = NULL, n1 = NULL, n2 = NULL, fisher = TRUE) #the steiger test cortest.jennrich(R1,R2,n1=NULL, n2=NULL) #the Jennrich test cortest.mat(R1,R2=NULL,n1=NULL,n2 = NULL) #an alternative test } \arguments{ \item{R1}{A correlation matrix. (If R1 is not rectangular, and cor=TRUE, the correlations are found). } \item{R2}{A correlation matrix. If R2 is not rectangular, and cor=TRUE, the correlations are found. If R2 is NULL, then the test is just whether R1 is an identity matrix. } \item{n1}{Sample size of R1 } \item{n2}{Sample size of R2 } \item{fisher}{Fisher z transform the correlations? } \item{cor}{By default, if the input matrices are not symmetric, they are converted to correlation matrices. That is, they are treated as if they were the raw data. If cor=FALSE, then the input matrices are taken to be correlation matrices.} \item{method}{Which type of correlation to find ("pearson", "spearman","kendall")} \item{use}{How to handle missing data (defaults to pairwise)} } \details{There are several ways to test if a matrix is the identity matrix. The most well known is the chi square test of Bartlett (1951) and Box (1949). A very straightforward test, discussed by Steiger (1980) is to find the sum of the squared correlations or the sum of the squared Fisher transformed correlations. Under the null hypothesis that all the correlations are equal, this sum is distributed as chi square. This is implemented in \code{\link{cortest}} and \code{\link{cortest.normal}} Yet another test, is the Jennrich(1970) test of the equality of two matrices. This compares the differences between two matrices to the averages of two matrices using a chi square test. This is implemented in \code{\link{cortest.jennrich}}. Yet another option \code{\link{cortest.mat}} is to compare the two matrices using an approach analogous to that used in evaluating the adequacy of a factor model. In factor analysis, the maximum likelihood fit statistic is \cr \eqn{f = log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^{-1} R|) - n.items}{f = log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^-1 R|) - n.items}. This in turn is converted to a chi square \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f } (see \code{\link{fa}}.) That is, the model (M = FF' + U2) is compared to the original correlation matrix (R) by a function of \eqn{M^{-1} R}. By analogy, in the case of two matrices, A and B, \code{\link{cortest.mat}} finds the chi squares associated with \eqn{A^{-1}B} and \eqn{A B^{-1}}. The sum of these two \eqn{\chi^2} will also be a \eqn{\chi^2} but with twice the degrees of freedom. } \value{ \item{chi2}{The chi square statistic} \item{df}{Degrees of freedom for the Chi Square} \item{prob}{The probability of observing the Chi Square under the null hypothesis.} } \references{ Steiger, James H. (1980) Testing pattern hypotheses on correlation matrices: alternative statistics and some empirical results. Multivariate Behavioral Research, 15, 335-352. Jennrich, Robert I. (1970) An Asymptotic \eqn{\chi^2} Test for the Equality of Two Correlation Matrices. Journal of the American Statistical Association, 65, 904-912. } \author{ William Revelle } \note{ Both the cortest.jennrich and cortest.normal are probably overly stringent. The ChiSquare values for pairs of random samples from the same population are larger than would be expected. This is a good test for rejecting the null of no differences. } \seealso{\code{\link{cortest.bartlett}} \code{\link{corr.test}}} \examples{ set.seed(42) x <- matrix(rnorm(1000),ncol=10) cortest.normal(x) #just test if this matrix is an identity #now create two correlation matrices that should be equal x <- sim.congeneric(loads =c(.9,.8,.7,.6,.5),N=1000,short=FALSE) y <- sim.congeneric(loads =c(.9,.8,.7,.6,.5),N=1000,short=FALSE) cortest(x$r,y$r,n1=1000,n2=1000) #The Steiger test cortest.jennrich(x$r,y$r,n1=100,n2=1000) # The Jennrich test cortest.mat(x$r,y$r,n1=1000,n2=1000) #twice the degrees of freedom as the Jennrich #create a new matrix that is different z <- sim.congeneric(loads=c(.8,.8,.7,.7, .6), N= 1000, short=FALSE) cortest(x$r,z$r,n1=1000) #these should be different } \keyword{ multivariate } psych/man/cor.ci.Rd0000644000176200001440000001500613715124051013620 0ustar liggesusers\name{corCi} \alias{corCi} \alias{cor.ci} \title{Bootstrapped and normal confidence intervals for raw and composite correlations} \description{Although normal theory provides confidence intervals for correlations, this is particularly problematic with Synthetic Aperture Personality Assessment (SAPA) data where the individual items are Massively Missing at Random. Bootstrapped confidence intervals are found for Pearson, Spearman, Kendall, tetrachoric, or polychoric correlations and for scales made from those correlations. If given a correlation matrix and sample size(s), normal theory confidence intervals are provided. } \usage{ corCi(x, keys = NULL, n.iter = 100, p = 0.05,overlap = FALSE, poly = FALSE, method = "pearson", plot=TRUE,minlength=5,n=NULL,...) cor.ci(x, keys = NULL, n.iter = 100, p = 0.05,overlap = FALSE, poly = FALSE, method = "pearson", plot=TRUE,minlength=5,n=NULL,...) } \arguments{ \item{x}{The raw data, or a correlation matrix if not doing bootstrapping} \item{keys}{If NULL, then the confidence intervals of the raw correlations are found. Otherwise, composite scales are formed from the keys applied to the correlation matrix (in a logic similar to \code{\link{cluster.cor}} but without the bells and whistles) and the confidence of those composite scales intercorrelations. } \item{n.iter}{The number of iterations to bootstrap over. This will be very slow if using tetrachoric/or polychoric correlations. } \item{p}{The upper and lower confidence region will include 1-p of the distribution.} \item{overlap}{If true, the correlation between overlapping scales is corrected for item overlap.} \item{poly}{if FALSE, then find the correlations using the method specified (defaults to Pearson). If TRUE, the polychoric correlations will be found (slowly). Because the polychoric function uses multicores (if available), and corCi does as well, the number of cores used is options("mc.cores")^2. } \item{method}{"pearson","spearman", "kendall"} \item{plot}{Show the correlation plot with correlations scaled by the probability values. To show the matrix in terms of the confidence intervals, use \code{\link{cor.plot.upperLowerCi}}.} \item{minlength}{What is the minlength to use in abbreviations of the cis? Defaults to 5} \item{n}{If finding confidence intervals from a correlation matrix, specify the n} \item{...}{Other parameters for axis (e.g., cex.axis to change the font size, srt to rotate the numbers in the plot)} } \details{ If given a correlation matrix, then confidence intervals are found based upon the sample sizes using the conventional r2z fisher transformation (\code{\link{fisherz}} and the normal distribution. If given raw data, correlations are found. If keys are specified (the normal case), then composite scales based upon the correlations are found and reported. This is the same procedure as done using \code{\link{cluster.cor}} or \code{\link{scoreItems}}. Then (with raw data) the data are recreated n.iter times by sampling subjects (rows) with replacement and the correlations (and composite scales) are found again (and again and again). Mean and standard deviations of these values are calculated based upon the Fisher Z transform of the correlations. Summary statistics include the original correlations and their confidence intervals. For those who want the complete set of replications, those are available as an object in the resulting output. Although particularly useful for SAPA (\url{https://www.sapa-project.org/}) type data where we have lots of missing data, this will work for any normal data set as well. Although the correlations are shown automatically as a \code{\link{cor.plot}}, it is possible to show the upper and lower confidence intervals by using \code{\link{cor.plot.upperLowerCi}}. This will also return, invisibly, a matrix for printing with the lower and upper bounds of the correlations shown below and above the diagonal (see the first example). } \value{ \item{rho }{The original (composite) correlation matrix. } \item{means }{Mean (of Fisher transformed) correlation retransformed back to the r units} \item{sds}{Standard deviation of Fisher transformed correlations} \item{ci}{Mean +/- alpha/2 of the z scores as well as the alpha/2 and 1-alpha/2 quantiles. These are labeled as lower.emp(ircal), lower.norm(al), upper.norm and upper.emp.} \item{replicates}{The observed replication values so one can do one's own estimates} } \references{For SAPA type data, see Revelle, W., Wilt, J., and Rosenthal, A. (2010) Personality and Cognition: The Personality-Cognition Link. In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. } \author{William Revelle} \seealso{\code{\link{make.keys}}, \code{\link{cluster.cor}}, and \code{\link{scoreItems}} for forming synthetic correlation matrices from composites of item correlations. See \code{\link{scoreOverlap}} for correcting for item overlap in scales. See also \code{\link{corr.test}} for standard significance testing of correlation matrices. See also \code{\link{lowerCor}} for finding and printing correlation matrices, as well as \code{\link{lowerMat}} for displaying them. Also see \code{\link{cor.plot.upperLowerCi}} for displaying the confidence intervals graphically. } \examples{ #find confidence intervals of a correlation matrix with specified sample size ci <- corCi(Thurstone[1:6,1:6],n=213) ci #show them R <- cor.plot.upperLowerCi(ci) #show them graphically R #show them as a matrix #confidence intervals by bootstrapping requires raw data corCi(psychTools::bfi[1:200,1:10]) # just the first 10 variables #The keys have overlapping scales keys <- list(agree=c("-A1","A2","A3","A4","A5"), conscientious= c("C1", "C2","C3","-C4","-C5"),extraversion=c("-E1","-E2","E3","E4","E5"), neuroticism= c("N1", "N2", "N3","N4","N5"), openness = c("O1","-O2","O3","O4","-O5"), alpha=c("-A1","A2","A3","A4","A5","C1","C2","C3","-C4","-C5","N1","N2","N3","N4","N5"), beta = c("-E1","-E2","E3","E4","E5","O1","-O2","O3","O4","-O5") ) #do not correct for item overlap rci <- corCi(psychTools::bfi[1:200,],keys,n.iter=10,main="correlation with overlapping scales") #also shows the graphic -note the overlap #correct for overlap rci <- cor.ci(psychTools::bfi[1:200,],keys,overlap=TRUE, n.iter=10,main="Correct for overlap") #show the confidence intervals ci <- cor.plot.upperLowerCi(rci) #to show the upper and lower confidence intervals ci #print the confidence intervals in matrix form } \keyword{multivariate } \keyword{ models }psych/man/faRotate.Rd0000644000176200001440000000543714214370646014227 0ustar liggesusers\name{faRotations} \alias{faRotations} \title{Multiple rotations of factor loadings to find local minima} \description{A dirty little secret of factor rotation algorithms is the problem of local minima (Nguyen and Waller,2022). Following ideas in that article, we allow for multiple random restarts and then return the global optimal solution. Used as part of the \code{\link{fa}} function or available as a stand alone function. } \usage{ faRotations(loadings, r = NULL, rotate = "oblimin", hyper = 0.15, n.rotations = 10,...) } \arguments{ \item{loadings}{Factor loadings matrix from \code{\link{fa}} or \code{\link{pca}} or any N x k loadings matrix} \item{r}{The correlation matrix used to find the factors. (Used to find the factor indeterminancy of the solution)} \item{rotate}{"none", "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor" are orthogonal rotations. "Promax", "promax", "oblimin", "simplimax", "bentlerQ, "geominQ" and "biquartimin" and "cluster" are possible oblique transformations of the solution. Defaults to oblimin. } \item{hyper}{The value defining when a loading is in the ``hyperplane".} \item{n.rotations}{The number of random restarts to use.} \item{...}{additional parameters, specifically, keys may be passed if using the target rotation, or delta if using geominQ, or whether to normalize if using Varimax} } \details{Nguyen and Waller review the problem of local minima in factor analysis. This is a problem for all rotation algorithms, but is more so for some. \code{\link{faRotate}} generates n.rotations different starting values and then applies the specified rotation to the original loadings using multiple start values. Hyperplane counts and complexity indices are reported for each starting matrix, and the one with the highest hyoerplane count and the lowest complexity is returned. } \value{ \item{loadings}{The best rotated solution} \item{Phi}{Factor correlations} \item{rotation.stats}{Hyperplane count, complexity.} \item{rot.mat}{The rotation matrix used.} } \references{ Nguyen, H. V., & Waller, N. G. (2022, January 6). Local Minima and Factor Rotations in Exploratory Factor Analysis. Psychological Methods. Advance online publication. doi 10.1037/met0000467 } \author{William Revelle } \note{Adapted from the fungible package by Waller } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{fa}} } \examples{ f5 <- fa(psychTools::bfi[,1:25],5,rotate="none") faRotations(f5,n.rotations=10) #note that the factor analysis needs to not do the rotation faRotations(f5$loadings) #matrix input geo <- faRotations(f5,rotate="geominQ",n.rotation=10) # a popular alternative, but more sensitive to local minima describe(geo$rotation.stats[,1:3]) } \keyword{ multivariate } \keyword{ models} psych/man/Gorsuch.Rd0000644000176200001440000000210011546641041014047 0ustar liggesusers\name{Gorsuch} \alias{Gorsuch} \docType{data} \title{Example data set from Gorsuch (1997) for an example factor extension. } \description{ Gorsuch (1997) suggests an alternative to the classic Dwyer (1937) factor extension technique. This data set is taken from that article. Useful for comparing \code{link{fa.extension}} with and without the correct=TRUE option. } \usage{data(Gorsuch)} \details{Gorsuc (1997) suggested an alternative model for factor extension. His method is appropriate for the case of repeated variables. This is handled in \code{link{fa.extension}} with correct=FALSE } \source{Richard L. Gorsuch (1997) New Procedure for Extension Analysis in Exploratory Factor Analysis. Educational and Psychological Measurement, 57, 725-740. } \references{ Dwyer, Paul S. (1937), The determination of the factor loadings of a given test from the known factor loadings of other tests. Psychometrika, 3, 173-178 } \examples{ data(Gorsuch) Ro <- Gorsuch[1:6,1:6] Roe <- Gorsuch[1:6,7:10] fo <- fa(Ro,2,rotate="none") fa.extension(Roe,fo,correct=FALSE) } \keyword{datasets} psych/man/fa.diagram.Rd0000644000176200001440000002205714430250133014434 0ustar liggesusers\name{fa.diagram} \alias{fa.graph} \alias{fa.rgraph} \alias{fa.diagram} \alias{extension.diagram} \alias{het.diagram} \title{ Graph factor loading matrices} \description{Factor analysis or principal components analysis results are typically interpreted in terms of the major loadings on each factor. These structures may be represented as a table of loadings or graphically, where all loadings with an absolute value > some cut point are represented as an edge (path). \code{\link{fa.diagram}} uses the various \code{\link{diagram}} functions to draw the diagram. \code{\link{fa.graph}} generates dot code for external plotting. \code{\link{fa.rgraph}} uses the Rgraphviz package (if available) to draw the graph. \code{\link{het.diagram}} will draw "heterarchy" diagrams of factor/scale solutions at different levels. All of these as well as some additional functions may be called by \code{\link{diagram}} which is a wrapper to the specific functions. } \usage{ fa.diagram(fa.results,Phi=NULL,fe.results=NULL,sort=TRUE,labels=NULL,cut=.3, simple=TRUE, errors=FALSE,g=FALSE,digits=1,e.size=.05,rsize=.15,side=2, main,cex=NULL,l.cex=NULL, marg=c(.5,.5,1,.5),adj=1,ic=FALSE, ...) het.diagram(r,levels,cut=.3,digits=2,both=TRUE, main="Heterarchy diagram",l.cex,gap.size,...) extension.diagram(fa.results,Phi=NULL,fe.results=NULL,sort=TRUE,labels=NULL,cut=.3, f.cut, e.cut=.1,simple=TRUE,e.simple=FALSE,errors=FALSE,g=FALSE, digits=1,e.size=.05,rsize=.15,side=2,main,cex=NULL, e.cex=NULL, marg=c(.5,.5,1,.5),adj=1,ic=FALSE, ...) fa.graph(fa.results,out.file=NULL,labels=NULL,cut=.3,simple=TRUE, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,main="Factor Analysis", ...) fa.rgraph(fa.results,out.file=NULL,labels=NULL,cut=.3,simple=TRUE, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,main="Factor Analysis",graphviz=TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{fa.results}{The output of factor analysis, principal components analysis, or ICLUST analysis. May also be a factor loading matrix from anywhere.} \item{Phi}{Normally not specified (it is is found in the FA, pc, or ICLUST, solution), this may be given if the input is a loadings matrix.} \item{fe.results}{the results of a factor extension analysis (if any)} \item{out.file}{ If it exists, a dot representation of the graph will be stored here (fa.graph)} \item{labels}{ Variable labels } \item{cut}{ Loadings with abs(loading) > cut will be shown } \item{f.cut}{factor correlations > f.cut will be shown} \item{e.cut}{extension loadings with abs(loading) > e.cut will be shown} \item{simple}{Only the biggest loading per item is shown} \item{e.simple}{Only the biggest loading per extension item is shown } \item{g}{Does the factor matrix reflect a g (first) factor. If so, then draw this to the left of the variables, with the remaining factors to the right of the variables. It is useful to turn off the simple parameter in this case.} \item{ic}{If drawing a cluster analysis result, should we treat it as latent variable model (ic=FALSE) or as an observed variable model (ic=TRUE) } \item{r}{A correlation matrix for the het.diagram function} \item{levels}{A list of the elements in each level} \item{both}{Should arrows have double heads (in het.diagram)} \item{size}{graph size } \item{sort}{sort the factor loadings before showing the diagram} \item{errors}{include error estimates (as arrows)} \item{e.size}{size of ellipses} \item{rsize}{size of rectangles} \item{side}{on which side should error arrows go?} \item{cex}{modify font size} \item{e.cex}{Modify the font size for the Dependent variables, defaults to cex} \item{l.cex}{modify the font size in arrows, defaults to cex} \item{gap.size}{The gap in the arrow for the label. Can be adjusted to compensate for variations in cex or l.cex} \item{marg}{sets the margins to be wider than normal, returns them to the normal size upon exit} \item{adj}{how many different positions (1-3) should be used for the numeric labels. Useful if they overlap each other.} \item{node.font}{what font should be used for nodes in fa.graph } \item{edge.font}{what font should be used for edges in fa.graph } \item{rank.direction}{ parameter passed to Rgraphviz-- which way to draw the graph } \item{digits}{ Number of digits to show as an edgelable } \item{main}{ Graphic title, defaults to "factor analyis" or "factor analysis and extension" } \item{graphviz}{Should we try to use Rgraphviz for output?} \item{\dots}{ other parameters } } \details{Path diagram representations have become standard in confirmatory factor analysis, but are not yet common in exploratory factor analysis. Representing factor structures graphically helps some people understand the structure. By default the arrows come from the latent variables to the observed variables. This is, of course, the factor model. However, if the class of the object to be drawn is 'principal', then reverse the direction of the arrows, and the 'latent' variables are no longer latent, and are shown as boxes. For cluster models, the default is to treat them as factors, but if ic =TRUE, then we treat it as a components model. fa.diagram does not use Rgraphviz and is the preferred function. fa.graph generates dot code to be used by an external graphics program. It does not have all the bells and whistles of fa.diagram, but these may be done in the external editor. Hierarchical (bifactor) models may be drawn by specifying the g parameter as TRUE. This allows for an graphical displays of various factor transformations with a bifactor structure (e.g., \code{\link{bifactor}} and \code{\link{biquartimin}}. See \code{\link{omega}} for an alternative way to find these structures. The \code{\link{het.diagram}} function will show the case of a hetarchical structure at multiple levels. It can also be used to show the patterns of correlations between sets of scales (e.g., EPI, NEO, BFI). The example is for showing the relationship between 3 sets of 4 variables from the Thurstone data set. The parameters l.cex and gap.size are used to adjust the font size of the labels and the gap in the lines. \code{\link{extension.diagram}} will draw a \code{\link{fa.extend}} result with slightly more control than using \code{\link{fa.diagram}} or the more generic \code{\link{diagram}} function. In fa.rgraph although a nice graph is drawn for the orthogonal factor case, the oblique factor drawing is acceptable, but is better if cleaned up outside of R or done using fa.diagram. The normal input is taken from the output of either \code{\link{fa}} or \code{\link{ICLUST}}. This latter case displays the ICLUST results in terms of the cluster loadings, not in terms of the cluster structure. Actually an interesting option. It is also possible to just give a factor loading matrix as input. In this case, supplying a Phi matrix of factor correlations is also possible. It is possible, using fa.graph, to export dot code for an omega solution. fa.graph should be applied to the schmid$sl object with labels specified as the rownames of schmid$sl. The results will need editing to make fully compatible with dot language plotting. To specify the model for a structural equation confirmatory analysis of the results, use \code{\link{structure.diagram}} instead. } \value{fa.diagram: A path diagram is drawn without using Rgraphviz. This is probably the more useful function. fa.rgraph: A graph is drawn using rgraphviz. If an output file is specified, the graph instructions are also saved in the dot language. fa.graph: the graph instructions are saved in the dot language. } \author{William Revelle } \note{ fa.rgraph requires Rgraphviz. Because there are occasional difficulties installing Rgraphviz from Bioconductor in that some libraries are misplaced and need to be relinked, it is probably better to use fa.diagram or fa.graph. } \seealso{ \code{\link{diagram}}, \code{\link{omega.graph}}, \code{\link{ICLUST.graph}}, \code{\link{bassAckward.diagram}}. \code{\link{structure.diagram}} will convert the factor diagram to sem modeling code. } \examples{ test.simple <- fa(item.sim(16),2,rotate="oblimin") #if(require(Rgraphviz)) {fa.graph(test.simple) } fa.diagram(test.simple) f3 <- fa(Thurstone,3,rotate="cluster") fa.diagram(f3,cut=.4,digits=2) f3l <- f3$loadings fa.diagram(f3l,main="input from a matrix") Phi <- f3$Phi fa.diagram(f3l,Phi=Phi,main="Input from a matrix") fa.diagram(ICLUST(Thurstone,2,title="Two cluster solution of Thurstone"),main="Input from ICLUST") het.diagram(Thurstone,levels=list(1:4,5:8,3:7)) #show how caffiene increases arousal and tension select <- c("active" , "alert" , "aroused", "sleepy", "tired" , "drowsy" , "anxious", "jittery" ,"nervous", "calm" , "relaxed" ,"at.ease" ,"gender", "drug") msq.small <- psychTools::msqR[select] fe <- fa.extend(msq.small, 2,1:12,14) extension.diagram(fe, ic=TRUE) #drug causes arousal } \keyword{ multivariate } \keyword{hplot} psych/man/predicted.validity.Rd0000644000176200001440000000573414524267352016254 0ustar liggesusers\name{predicted.validity} \alias{predicted.validity} \alias{item.validity} \alias{validityItem} \title{Find the predicted validities of a set of scales based on item statistics} \description{The validity of a scale varies as a function of the number of items in the scale, their average intercorrelation, and their average validity. The asymptotic limit of a scales validity for any particular criterion is just the average validity divided by the square root of the average within scale item correlation. \code{\link{predicted.validity}} will find the predicted validity for a set of scales (defined by a keys.list) and the average item validity for various criteria. The function will find (and report) scale reliabilities (using \code{\link{reliability}}) and average item validities (using \code{\link{item.validity}}) } \usage{ predicted.validity(x, criteria, keys, scale.rel = NULL, item.val = NULL) item.validity(x,criteria,keys) validityItem(x,criteria,keys) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A data set} \item{criteria}{Variables to predict from the scales } \item{keys}{A keys.list that defines the scales } \item{scale.rel}{If not specified, these will be found. Otherwise, this is the output from \code{\link{reliability}}. } \item{item.val}{If not specified, the average item validities for each scale will be found. Otherwise use the output from \code{\link{item.validity}} } } \details{ When predicting criteria from a set of items formed into scales, the validity of the scale (that is, the correlations of the scale with each criteria) is a function of the average item validity (r_y), the average intercorrelation of the items in the scale (r_x), and the number of items in the scale (n). The limit of validity is r_y/sqrt(r_x). Criteria will differ in their predictability from a set of scales. These asymptotic values may be used to help the decision on which scales to develop further. } \value{ \item{predicted}{The predicted validities given the scales specified} \item{item.validities }{The average item validities for each scale with each criterion} \item{scale.reliabilities}{The various statistics reported by the \code{\link{reliability}} function} \item{asymptotic}{A matrix of the asymptotic validities} } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv } \author{William Revelle} \seealso{ \code{\link{reliability}}, \code{\link{scoreItems}}, \code{\link{scoreFast}} } \examples{ pred.bfi <- predicted.validity(psychTools::bfi[,1:25], psychTools::bfi[,26:28], psychTools::bfi.keys) pred.bfi } \keyword{ multivariate } \keyword{ models }psych/man/error.crosses.Rd0000755000176200001440000000770413463662135015277 0ustar liggesusers\name{error.crosses} \alias{error.crosses} \title{ Plot x and y error bars } \description{Given two vectors of data (X and Y), plot the means and show standard errors in both X and Y directions. } \usage{ error.crosses(x,y,labels=NULL,main=NULL,xlim=NULL,ylim= NULL, xlab=NULL,ylab=NULL,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,add=FALSE, colors=NULL,col.arrows=NULL,col.text=NULL,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A vector of data or summary statistics (from Describe) } \item{y}{ A second vector of data or summary statistics (also from Describe)} \item{labels}{the names of each pair -- defaults to rownames of x } \item{main}{The title for the graph} \item{xlim}{xlim values if desired-- defaults to min and max mean(x) +/- 2 se} \item{ylim}{ylim values if desired -- defaults to min and max mean(y) +/- 2 se} \item{xlab}{label for x axis -- grouping variable 1} \item{ylab}{label for y axis -- grouping variable 2} \item{pos}{Labels are located where with respect to the mean? } \item{offset}{Labels are then offset from this location} \item{arrow.len}{ Arrow length } \item{alpha}{alpha level of error bars } \item{sd}{if sd is TRUE, then draw means +/- 1 sd)} \item{add}{if TRUE, overlay the values with a prior plot} \item{colors}{What color(s) should be used for the plot character? Defaults to black} \item{col.arrows}{What color(s) should be used for the arrows -- defaults to colors} \item{col.text}{What color(s) should be used for the text -- defaults to colors} \item{\dots}{ Other parameters for plot } } \details{For an example of two way error bars describing the effects of mood manipulations upon positive and negative affect, see \url{https://personality-project.org/revelle/publications/happy-sad-appendix/FIG.A-6.pdf} The second example shows how error crosses can be done for multiple variables where the grouping variable is found dynamically. The \code{\link{errorCircles}} example shows how to do this in one step. } \author{ William Revelle \cr \email{revelle@northwestern.edu} } \seealso{To draw error bars for single variables \code{\link{error.bars}}, or by groups \code{\link{error.bars.by}}, or to find descriptive statistics \code{\link{describe}} or descriptive statistics by a grouping variable \code{\link{describeBy}} and \code{\link{statsBy}}. A much improved version is now called \code{\link{errorCircles}}. } \examples{ #just draw one pair of variables desc <- describe(attitude) x <- desc[1,] y <- desc[2,] error.crosses(x,y,xlab=rownames(x),ylab=rownames(y)) #now for a bit more complicated plotting data(psychTools::bfi) desc <- describeBy(psychTools::bfi[1:25],psychTools::bfi$gender) #select a high and low group error.crosses(desc$'1',desc$'2',ylab="female scores", xlab="male scores",main="BFI scores by gender") abline(a=0,b=1) #do it from summary statistics (using standard errors) g1.stats <- data.frame(n=c(10,20,30),mean=c(10,12,18),se=c(2,3,5)) g2.stats <- data.frame(n=c(15,20,25),mean=c(6,14,15),se =c(1,2,3)) error.crosses(g1.stats,g2.stats) #Or, if you prefer to draw +/- 1 sd. instead of 95% confidence g1.stats <- data.frame(n=c(10,20,30),mean=c(10,12,18),sd=c(2,3,5)) g2.stats <- data.frame(n=c(15,20,25),mean=c(6,14,15),sd =c(1,2,3)) error.crosses(g1.stats,g2.stats,sd=TRUE) #and seem even fancy plotting: This is taken from a study of mood #four films were given (sad, horror, neutral, happy) #with a pre and post test data(psychTools::affect) colors <- c("black","red","green","blue") films <- c("Sad","Horror","Neutral","Happy") affect.mat <- describeBy(psychTools::affect[10:17],psychTools::affect$Film,mat=TRUE) error.crosses(affect.mat[c(1:4,17:20),],affect.mat[c(5:8,21:24),], labels=films[affect.mat$group1],xlab="Energetic Arousal", ylab="Tense Arousal",colors = colors[affect.mat$group1],pch=16,cex=2) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/block.random.Rd0000644000176200001440000000232314214767310015020 0ustar liggesusers\name{block.random} \alias{block.random} \title{Create a block randomized structure for n independent variables} \description{Random assignment of n subjects with an equal number in all of N conditions may done by block randomization, where the block size is the number of experimental conditions. The number of Independent Variables and the number of levels in each IV are specified as input. The output is a the block randomized design. } \usage{ block.random(n, ncond = NULL) } \arguments{ \item{n}{The number of subjects to randomize. Must be a multiple of the number of experimental conditions} \item{ncond}{The number of conditions for each IV. Defaults to 2 levels for one IV. If more than one IV, specify as a vector. If names are provided, they are used, otherwise the IVs are labeled as IV1 ... IVn} } \value{ \item{blocks }{A matrix of subject numbers, block number, and randomized levels for each IV} } \author{William Revelle } \note{Prepared for a course on Research Methods in Psychology \url{https://personality-project.org/courses/205/205.syllabus.html} } \examples{ br <- block.random(n=24,c(2,3)) pairs.panels(br) br <- block.random(96,c(time=4,drug=3,sex=2)) pairs.panels(br) } \keyword{multivariate } psych/man/ICLUST.rgraph.Rd0000644000176200001440000000656713256544643014761 0ustar liggesusers\name{ICLUST.rgraph} \alias{ICLUST.rgraph} \title{ Draw an ICLUST graph using the Rgraphviz package } \description{Given a cluster structure determined by \code{\link{ICLUST}}, create a rgraphic directly using Rgraphviz. To create dot code to describe the \code{\link{ICLUST}} output with more precision, use \code{\link{ICLUST.graph}}. As an option, dot code is also generated and saved in a file. To use the dot code, use either https://www.graphviz.org/ Graphviz or a commercial viewer (e.g., OmniGraffle). } \usage{ ICLUST.rgraph(ic.results, out.file = NULL, min.size = 1, short = FALSE, labels = NULL, size = c(8, 6), node.font = c("Helvetica", 14), edge.font = c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits = 2, title = "ICLUST",label.font=2, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ic.results}{output list from ICLUST } \item{out.file}{ File name to save optional dot code. } \item{min.size}{draw a smaller node (without all the information) for clusters < min.size -- useful for large problems} \item{short}{if short==TRUE, don't use variable names} \item{labels}{vector of text labels (contents) for the variables} \item{size}{size of output } \item{node.font}{ Font to use for nodes in the graph } \item{edge.font}{ Font to use for the labels of the arrows (edges)} \item{rank.direction}{LR or TB or RL } \item{digits}{ number of digits to show } \item{title}{ any title } \item{label.font}{The variable labels can be a different size than the other nodes. This is particularly helpful if the number of variables is large or the labels are long.} \item{\dots}{ other options to pass } } \details{ Will create (or overwrite) an output file and print out the dot code to show a cluster structure. This dot file may be imported directly into a dot viewer (e.g., https://www.graphviz.org/). The "dot" language is a powerful graphic description language that is particulary appropriate for viewing cluster output. Commercial graphics programs (e.g., OmniGraffle) can also read (and clean up) dot files. ICLUST.rgraph takes the output from \code{\link{ICLUST}} results and processes it to provide a pretty picture of the results. Original variables shown as rectangles and ordered on the left hand side (if rank direction is RL) of the graph. Clusters are drawn as ellipses and include the alpha, beta, and size of the cluster. Edges show the cluster intercorrelations. It is possible to trim the output to not show all cluster information. Clusters < min.size are shown as small ovals without alpha, beta, and size information. } \value{Output is a set of dot commands written either to console or to the output file. These commands may then be used as input to any "dot" viewer, e.g., Graphviz. ICLUST.rgraph is a version of \code{\link{ICLUST.graph}} that uses Rgraphviz to draw on the screen as well. Additional output is drawn to main graphics screen. } \references{ ICLUST: https://personality-project.org/r/r.ICLUST.html} \author{ \email{revelle@northwestern.edu } \cr \url{https://personality-project.org/revelle.html}} \seealso{ \code{\link{VSS.plot}}, \code{\link{ICLUST}}} \note{ Requires Rgraphviz} \examples{ test.data <- Harman74.cor$cov ic.out <- ICLUST(test.data) #uses iclust.diagram instead } \keyword{ multivariate} \keyword{ cluster}% __ONLY ONE__ keyword per line \keyword{hplot} psych/man/thurstone.Rd0000644000176200001440000000701013752056657014513 0ustar liggesusers\name{thurstone} \alias{thurstone} \title{Thurstone Case V scaling} \description{Thurstone Case V scaling allows for a scaling of objects compared to other objects. As one of the cases considered by Thurstone, Case V makes the assumption of equal variances and uncorrelated distributions. } \usage{ thurstone(x, ranks = FALSE, digits = 2) } \arguments{ \item{x}{ A square matrix or data frame of preferences, or a rectangular data frame or matrix rank order choices. } \item{ranks}{TRUE if rank orders are presented} \item{digits}{number of digits in the goodness of fit} } \details{Louis L. Thurstone was a pioneer in psychometric theory and measurement of attitudes, interests, and abilities. Among his many contributions was a systematic analysis of the process of comparative judgment (thurstone, 1927). He considered the case of asking subjects to successively compare pairs of objects. If the same subject does this repeatedly, or if subjects act as random replicates of each other, their judgments can be thought of as sampled from a normal distribution of underlying (latent) scale scores for each object, Thurstone proposed that the comparison between the value of two objects could be represented as representing the differences of the average value for each object compared to the standard deviation of the differences between objects. The basic model is that each item has a normal distribution of response strength and that choice represents the stronger of the two response strengths. A justification for the normality assumption is that each decision represents the sum of many independent inputs and thus, through the central limit theorem, is normally distributed. Thurstone considered five different sets of assumptions about the equality and independence of the variances for each item (Thurston, 1927). Torgerson expanded this analysis slightly by considering three classes of data collection (with individuals, between individuals and mixes of within and between) crossed with three sets of assumptions (equal covariance of decision process, equal correlations and small differences in variance, equal variances). The data may be either a square matrix of dataframe of preferences (as proportions with the probability of the column variable being chosen over the row variable) or a matrix or dataframe of rank orders ( 1 being prefered to 2, etc.) The second example creates 100 random permutations of ranks 1-5. These data are then converted to a matrix of choices and then scaled. The goodness of fit is practically perfect, even though the data are meaningless. This suggests a better goodness of fit test should be applied. } \value{ \item{GF }{Goodness of fit 1 = 1 - sum(squared residuals/squared original) for lower off diagonal. \cr Goodness of fit 2 = 1 - sum(squared residuals/squared original) for full matrix.} \item{residual }{square matrix of residuals (of class dist)} \item{choice}{The original choice data} ... } \references{ Thurstone, L. L. (1927) A law of comparative judgments. Psychological Review, 34, 273-286. Revelle, W. An introduction to psychometric theory with applications in R. (in preparation), Springer. \url{https://personality-project.org/r/book/} } \author{William Revelle} \examples{ data(psychTools::vegetables) thurstone(psychTools::veg) #But consider the case of 100 random orders set.seed((42)) ranks <- matrix(NA,nrow=100,ncol=5) for(i in 1:100) ranks[i,] <- sample(5,5) t <- thurstone(ranks,TRUE) t #show the fits t$hoice #show the choice matrix } \keyword{ models } psych/man/deprecated.Rd0000644000176200001440000000457614537105117014562 0ustar liggesusers\name{fa.poly} \alias{factor.pa} \alias{factor.minres} \alias{factor.wls} \alias{fa.poly} \title{Deprecated Exploratory Factor analysis functions. Please use fa} \description{After 6 years, it is time to stop using these deprecated functions! Please see \code{\link{fa}} which includes all of the functionality of these older functions. } \usage{ fa.poly(x,nfactors=1,n.obs = NA, n.iter=1, rotate="oblimin", SMC=TRUE, missing=FALSE, impute="median", min.err = .001, max.iter=50, symmetric=TRUE, warnings=TRUE, fm="minres",alpha=.1, p =.05,scores="regression", oblique.scores=TRUE, weight=NULL,global=TRUE,...) #deprecated factor.minres(r, nfactors=1, residuals = FALSE, rotate = "varimax",n.obs = NA, scores = FALSE,SMC=TRUE, missing=FALSE,impute="median",min.err = 0.001, digits = 2, max.iter = 50,symmetric=TRUE,warnings=TRUE,fm="minres") #deprecated factor.wls(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA, scores=FALSE,SMC=TRUE,missing=FALSE,impute="median", min.err = .001, digits=2,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="wls") #deprecated } \arguments{ \item{r}{deprecated.} \item{x}{deprecated} \item{nfactors}{ deprecated } \item{n.obs}{deprecated } \item{rotate}{deprecated} \item{n.iter}{deprecated} \item{residuals}{deprecated } \item{scores}{deprecated} \item{SMC}{deprecated } \item{missing}{deprecated} \item{impute}{deprecated} \item{max.iter}{deprecated} \item{symmetric}{deprecated} \item{warnings}{deprecated} \item{fm}{deprecated} \item{alpha}{deprecated} \item{p}{deprecated} \item{oblique.scores}{deprecated} \item{weight}{deprecated} \item{global}{deprecated} \item{digits}{deprecated} \item{min.err}{deprecated} \item{...}{deprecated} } \details{Please see the writeup for \code{\link{fa}} for all of the functionality in these older functions. } \value{ Pleases see the writeup for \code{\link{fa}} } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \note{These functions have been deprecated for 8 years. Don't use them. } \examples{ #none, you should see fa #using the Harman 24 mental tests, compare a principal factor with a principal components solution } \keyword{ multivariate } \keyword{ models}psych/man/cor.plot.Rd0000644000176200001440000002636214527736753014236 0ustar liggesusers\name{corPlot} \Rdversion{1.1} \alias{cor.plot} \alias{corPlot} \alias{corPlotUpperLowerCi} \alias{cor.plot.upperLowerCi} \title{Create an image plot for a correlation or factor matrix} \description{Correlation matrices may be shown graphically by using the image function to emphasize structure. This is a particularly useful tool for showing the structure of correlation matrices with a clear structure. Partially meant for the pedagogical value of the graphic for teaching or discussing factor analysis and other multivariate techniques. The sort option uses iclust to sort the matrix before plotting. } \usage{ corPlot(r,numbers=TRUE,colors=TRUE,n=51,main=NULL,zlim=c(-1,1), show.legend=TRUE, labels=NULL,n.legend=10,keep.par=TRUE,select=NULL, pval=NULL, digits=2, trailing=TRUE, cuts=c(.001,.01),scale=TRUE,cex,MAR,upper=TRUE,diag=TRUE, symmetric=TRUE,stars=FALSE, adjust="holm",xaxis=1, xlas=0, ylas=2,ysrt=0,xsrt=0, gr=NULL, alpha=.75, min.length=NULL,sort=FALSE,n.obs=NULL, ...) corPlotUpperLowerCi(R,numbers=TRUE,cuts=c(.001,.01,.05),select=NULL, main="Upper and lower confidence intervals of correlations",adjust=FALSE,...) cor.plot(r,numbers=TRUE,colors=TRUE,n=51,main=NULL,zlim=c(-1,1), show.legend=TRUE, labels=NULL,n.legend=10,keep.par=TRUE,select=NULL, pval=NULL, digits=2, trailing=TRUE, cuts=c(.001,.01),scale=TRUE,cex,MAR,upper=TRUE,diag=TRUE, symmetric=TRUE,stars=FALSE, adjust="holm",xaxis=1, xlas=0, ylas=2,ysrt=0,xsrt=0, gr=NULL, alpha=.75, min.length=NULL, sort=FALSE, n.obs=NULL,...) #deprecated cor.plot.upperLowerCi(R,numbers=TRUE,cuts=c(.001,.01,.05),select=NULL, main="Upper and lower confidence intervals of correlations",adjust=FALSE,...) #deprecated } \arguments{ \item{r}{A correlation matrix or the output of \code{\link{fa}}, \code{\link{principal}} or \code{\link{omega}}, or a raw data matrix. } \item{R}{The object returned from \code{\link{cor.ci}} } \item{numbers}{Display the numeric value of the correlations. (As of September, 2019) Defaults to TRUE.} \item{colors}{Defaults to TRUE and colors use colors from the colorRampPalette from red through white to blue, but colors=FALSE will use a grey scale} \item{n}{The number of levels of shading to use. Defaults to 51} \item{main}{A title. Defaults to "correlation plot"} \item{zlim}{The range of values to color -- defaults to -1 to 1. If specified as NULL, then defaults to min and max observed correlation.} \item{show.legend}{A legend (key) to the colors is shown on the right hand side} \item{labels}{if NULL, use column and row names, otherwise use labels} \item{n.legend}{How many categories should be labelled in the legend?} \item{sort}{If true, then sort the variables using the iclust algorithm} \item{keep.par}{restore the graphic parameters when exiting} \item{pval}{scale the numbers by their pvals, categorizing them based upon the values of cuts} \item{digits}{Round off to digits. Defaults to 2.} \item{trailing}{Show trailing zeros. } \item{cuts}{Scale the numbers by the categories defined by pval < cuts} \item{scale}{Should the size of the numbers be scaled by the significance level?} \item{select}{Select the subset of variables to plot} \item{cex}{Character size. Should be reduced a bit for large numbers of variables.} \item{MAR}{Allows for adjustment .of the margins if using really long labels or big fonts} \item{upper}{Should the upper off diagonal matrix be drawn, or left blank?} \item{diag}{Should we show the diagonal?} \item{symmetric}{By default, if given a non-symmetric matrix, we find the correlations using pair.wise complete and then show them. If wanting to display a non-symmetric matrix, then specify that symmetric is FALSE} \item{stars}{For those people who like to show the 'significance' of correlations by using magic astricks, set stars=TRUE} \item{n.obs}{If you want to show "stars" for symmetric input matrices (i.e. correlations), specify the number of observations} \item{adjust}{If showing significance, should we adjust for multiple tests? The default is to show zero order probabilities below the diagonal and adjust these using the 'holm' correction above the diagonal. Use adjust = "none" if no adjustment is desired. adjust is also used in corPlotUpperLowerCI to show the nominal alpha confidence intervals (adjust =FALSE) or the Bonferonni adjusted confidence intervals (adjust=TRUE).} \item{xlas}{Orientation of the x axis labels (1 = horizontal, 0, parallel to axis, 2 perpendicular to axis)} \item{ylas}{Orientation of the y axis labels (1 = horizontal, 0, parallel to axis, 2 perpendicular to axis)} \item{ysrt}{Rotation of y labels in degrees} \item{xsrt}{Rotation of x labels in degrees} \item{xaxis}{By default, draw this below the figure. If xaxis=3, then it wil be drawn above the figure} \item{gr}{A color gradient: e.g., gr <- colorRampPalette(c("#B52127", "white", "#2171B5")) will produce slightly more pleasing (to some) colors. See next to last example.} \item{alpha}{The degree of transparency (0 = completely, 1= not). Default value of .75 makes somewhat moreor pleasing plots when using numbers.} \item{min.length}{If not NULL, then the maximum number of characters to use in row/column labels} \item{...}{Other parameters for axis (e.g., cex.axis to change the font size, srt to rotate the numbers in the plot)} } \details{When summarizing the correlations of large data bases or when teaching about factor analysis or cluster analysis, it is useful to graphically display the structure of correlation matrices. This is a simple graphical display using the image function. The difference between mat.plot with a regular image plot is that the primary diagonal goes from the top left to the lower right. zlim defines how to treat the range of possible values. -1 to 1 and the color choice is more reasonable. Setting it as c(0,1) will lead to negative correlations treated as zero. This is advantageous when showing general factor structures, because it makes the 0 white. There is an interesting case when plotting correlations corrected for attenuation. Some of these might exceed 1. In this case, either set zlim = NULL (to use the observed maximum and minimum values) or all values above 1 will be given a slightly darker shade than 1, but do not differ. The default shows a legend for the color coding on the right hand side of the figure. Inspired, in part, by a paper by S. Dray (2008) on the number of components problem. Modified following suggestions by David Condon and Josh Wilt to use a more meaningful color choice ranging from dark red (-1) through white (0) to dark blue (1). Further modified to allow for color choices using the gr option (suggested by Lorien Elleman). Further modified to include the numerical value of the correlation. (Inspired by the corrplot package). These values may be scaled according the the probability values found in \code{\link{cor.ci}} or \code{\link{corTest}}. Unless specified, the font size is dynamically scaled to have a cex = 10/max(nrow(r),ncol(r). This can produce fairly small fonts for large problems. The font size of the labels may be adjusted using cex.axis which defaults to one. By default \code{\link{cor.ci}} calls corPlotUpperLowerCi and scales the correlations based upon "significance" values. The correlations plotted are the upper and lower confidence boundaries. To show the correlations themselves, call corPlot directly. If using the output of \code{\link{corTest}}, the upper off diagonal will be scaled by the corrected probability, the lower off diagonal the scaling is the uncorrected probabilities. If given raw data or correlation matrix, \code{\link{corPlotUpperLowerCi}} will automatically call \code{\link{corTest}} or \code{\link{cor.ci}}. If using the output of \code{\link{corTest}} or \code{\link{cor.ci}} as input to \code{\link{corPlotUpperLowerCi}}, the upper off diagonal will be the upper bounds and the lower off diagonal the lower bounds of the confidence intervals. If adjust=TRUE, these will use the Holm or Bonferroni adjusted values (depending upon corTest). To compare the elements of two correlation matrices, \code{\link{corPlot}} the results from \code{\link{lowerUpper}}. To do multiple \code{\link{corPlot}} on the same plot, specify that show.legend=FALSE and keep.par=FALSE. See the last examples. Care should be taken when selecting rows and columns from a non-symmetric matrix (e.g., the corrected correlations from \code{\link{scoreItems}} or \code{\link{scoreOverlap}}). To show a factor loading matrix (or any non-symmetric matrix), set symmetric=FALSE. Otherwise the input will be treated as raw data and correlations will be found. The sort option will sort the matrix using the output from \code{\link{iclust}}. To sort the matrix use another order, use \code{\link{mat.sort}} first. To find correlations other than Pearson, plot the output from e.g., \code{\link{mixed.cor}}. } \author{William Revelle } \references{ Dray, Stephane (2008) On the number of principal components: A test of dimensionality based on measurements of similarity between matrices. Computational Statistics & Data Analysis. 52, 4, 2228-2237. } \seealso{ \code{\link{fa}}, \code{\link{mat.sort}}, \code{\link{cor.ci}}, \code{\link{corTest}} \code{\link{lowerUpper}}. } \examples{ corPlot(Thurstone,main="9 cognitive variables from Thurstone") #just blue implies positive manifold #select just some variables to plot corPlot(Thurstone, zlim=c(0,1),main="9 cognitive variables from Thurstone",select=c(1:3,7:9)) #now show a non-symmetric plot corPlot(Thurstone[4:9,1:3], zlim=c(0,1),main="9 cognitive variables from Thurstone",numbers=TRUE,symmetric=FALSE) #Two ways of including stars to show significance #From the raw data corPlot(sat.act,numbers=TRUE,stars=TRUE) #from a correlation matrix with pvals cp <- corTest(sat.act) #find the correlations and pvals r<- cp$r p <- cp$p corPlot(r,numbers=TRUE,diag=FALSE,stars=TRUE, pval = p,main="Correlation plot with Holm corrected 'significance'") #now red means less than .5 corPlot(mat.sort(Thurstone),TRUE,zlim=c(0,1), main="9 cognitive variables from Thurstone (sorted by factor loading) ") simp <- sim.circ(24) corPlot(cor(simp),main="24 variables in a circumplex") #scale by raw and adjusted probabilities rs <- corTest(sat.act[1:200,] ) #find the probabilities of the correlations corPlot(r=rs$r,numbers=TRUE,pval=rs$p,main="Correlations scaled by probability values") #Show the upper and lower confidence intervals corPlotUpperLowerCi(R=rs,numbers=TRUE) #now do this again, but with lighter colors gr <- colorRampPalette(c("#B52127", "white", "#2171B5")) corPlot(r=rs$r,numbers=TRUE,pval=rs$p,main="Correlations scaled by probability values",gr=gr) corPlotUpperLowerCi(R=rs,numbers=TRUE,gr=gr) #do multiple plots #Also show the xaxis option op <- par(mfrow=c(2,2)) corPlot(psychTools::ability,show.legend=FALSE,keep.par=FALSE,upper=FALSE) f4 <- fa(psychTools::ability,4) corPlot(f4,show.legend=FALSE,keep.par=FALSE,numbers=TRUE,xlas=3) om <- omega(psychTools::ability,4) corPlot(om,show.legend=FALSE,keep.par=FALSE,numbers=TRUE,xaxis=3) par(op) corPlotUpperLowerCi(rs,adjust=TRUE,main="Holm adjusted confidence intervals",gr=gr) } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ hplot }% __ONLY ONE__ keyword per line psych/man/statsBy.Rd0000644000176200001440000003133614546100007014076 0ustar liggesusers\name{statsBy} \alias{statsBy} \alias{statsBy.boot} \alias{statsBy.boot.summary} \alias{faBy} \title{Find statistics (including correlations) within and between groups for basic multilevel analyses} \description{When examining data at two levels (e.g., the individual and by some set of grouping variables), it is useful to find basic descriptive statistics (means, sds, ns per group, within group correlations) as well as between group statistics (over all descriptive statistics, and overall between group correlations). Of particular use is the ability to decompose a matrix of correlations at the individual level into correlations within group and correlations between groups. } \usage{ statsBy(data, group, cors = FALSE, cor="cor", method="pearson", use="pairwise", poly=FALSE, na.rm=TRUE,alpha=.05,minlength=5,weights=NULL, min.n=1) statsBy.boot(data,group,ntrials=10,cors=FALSE,replace=TRUE,method="pearson") statsBy.boot.summary(res.list,var="ICC2") faBy(stats, nfactors = 1, rotate = "oblimin", fm = "minres", free = TRUE, all=FALSE, min.n = 12,quant=.1, ...) } \arguments{ \item{data}{A matrix or dataframe with rows for subjects, columns for variables. One of these columns should be the values of a grouping variable.} \item{group}{The names or numbers of the variable in data to use as the grouping variables.} \item{cors}{Should the results include the correlation matrix within each group? Default is FALSE.} \item{cor}{Type of correlation/covariance to find within groups and between groups. The default is Pearson correlation. To find within and between covariances, set cor="cov". Although polychoric, tetrachoric, and mixed correlations can be found within groups, this does not make sense for the between groups or the pooled within groups. In this case, correlations for each group will be as specified, but the between groups and pooled within will be Pearson. See the discussion below.} \item{method}{What kind of correlations should be found (default is Pearson product moment)} \item{use}{How to treat missing data. use="pairwise" is the default} \item{poly}{Find polychoric.tetrachoric correlations within groups if requested.} \item{na.rm}{Should missing values be deleted (na.rm=TRUE) or should we assume the data are clean?} \item{alpha}{The alpha level for the confidence intervals for the ICC1 and ICC2, and rwg, rbg} \item{minlength}{The minimum length to use when abbreviating the labels for confidence intervals} \item{weights}{If specified, weight the groups by weights when finding the pooled within group correlation. Otherwise weight by sample size.} \item{ntrials}{The number of trials to run when bootstrapping statistics} \item{replace}{Should the bootstrap be done by permuting the data (replace=FALSE) or sampling with replacement (replace=TRUE)} \item{res.list}{The results from statsBy.boot may be summarized using boot.stats} \item{var}{Name of the variable to be summarized from statsBy.boot} \item{stats}{The output of statsBy} \item{nfactors}{The number of factors to extract in each subgroup} \item{rotate}{The factor rotation/transformation} \item{fm}{The factor method (see \code{\link{fa}} for details)} \item{free}{Allow the factor solution to be freely estimated for each individual (see note).} \item{all}{Report individual factor analyses for each group as well as the summary table} \item{min.n}{The minimum number of within subject cases before we factor analyze it or find group statistics. Only works for one grouping variable.} \item{quant}{Show the upper and lower quant quantile of the factor loadings in faBy} \item{...}{Other parameters to pass to the fa function} } \details{Multilevel data are endemic in psychological research. In multilevel data, observations are taken on subjects who are nested within some higher level grouping variable. The data might be experimental (participants are nested within experimental conditions) or observational (students are nested within classrooms, students are nested within college majors.) To analyze this type of data, one uses random effects models or mixed effect models, or more generally, multilevel models. There are at least two very powerful packages (nlme and multilevel) which allow for complex analysis of hierarchical (multilevel) data structures. \code{\link{statsBy}} is a much simpler function to give some of the basic descriptive statistics for two level models. It is meant to supplement true multilevel modeling. For a group variable (group) for a data.frame or matrix (data), basic descriptive statistics (mean, sd, n) as well as within group correlations (cors=TRUE) are found for each group. The amount of variance associated with the grouping variable compared to the total variance is the type 1 IntraClass Correlation (ICC1): \eqn{ICC1 = (MSb-MSw)/(MSb + MSw*(npr-1))} where npr is the average number of cases within each group. The reliability of the group differences may be found by the ICC2 which reflects how different the means are with respect to the within group variability. \eqn{ICC2 = (MSb-MSw)/MSb}. Because the mean square between is sensitive to sample size, this estimate will also reflect sample size. Perhaps the most useful part of \code{\link{statsBy}} is that it decomposes the observed correlations between variables into two parts: the within group and the between group correlation. This follows the decomposition of an observed correlation into the pooled correlation within groups (rwg) and the weighted correlation of the means between groups discussed by Pedazur (1997) and by Bliese in the multilevel package. \eqn{r_{xy} = eta_{x_{wg}} * eta_{y_{wg}} * r_{xy_{wg}} + eta_{x_{bg}} * eta_{y_{bg}} * r_{xy_{bg}} } where \eqn{r_{xy}} is the normal correlation which may be decomposed into a within group and between group correlations \eqn{r_{xy_{wg}}} and \eqn{r_{xy_{bg}}} and eta is the correlation of the data with the within group values, or the group means. It is important to realize that the within group and between group correlations are independent of each other. That is to say, inferring from the 'ecological correlation' (between groups) to the lower level (within group) correlation is inappropriate. However, these between group correlations are still very meaningful, if inferences are made at the higher level. There are actually two ways of finding the within group correlations pooled across groups. We can find the correlations within every group, weight these by the sample size and then report this pooled value (pooled). This is found if the cors option is set to TRUE. It is logically equivalent to doing a sample size weighted meta-analytic correlation. The other way, rwg, considers the covariances, variances, and thus correlations when each subject's scores are given as deviation score from the group mean. If finding tetrachoric, polychoric, or mixed correlations, these two estimates will differ, for the pooled value is the weighted polychoric correlation, but the rwg is the Pearson correlation. If the weights parameter is specified, the pooled correlations are found by weighting the groups by the specified weight, rather than sample size. Confidence values and significance of \eqn{r_{xy_{wg}}}, pwg, reflect the pooled number of cases within groups, while \eqn{r_{xy_{bg}} }, pbg, the number of groups. These are not corrected for multiple comparisons. \code{\link{withinBetween}} is an example data set of the mixture of within and between group correlations. \code{\link{sim.multilevel}} will generate simulated data with a multilevel structure. The \code{\link{statsBy.boot}} function will randomize the grouping variable ntrials times and find the statsBy output. This can take a long time and will produce a great deal of output. This output can then be summarized for relevant variables using the \code{\link{statsBy.boot.summary}} function specifying the variable of interest. These two functions are useful in order to find if the mere act of grouping leads to large between group correlations. Consider the case of the relationship between various tests of ability when the data are grouped by level of education (statsBy(sat.act,"education")) or when affect data are analyzed within and between an affect manipulation (statsBy(flat,group="Film") ). Note in this latter example, that because subjects were randomly assigned to Film condition for the pretest, that the pretest ICC1s cluster around 0. \code{\link{faBy}} uses the output of \code{\link{statsBy}} to perform a factor analysis on the correlation matrix within each group. If the free parameter is FALSE, then each solution is rotated towards the group solution (as much as possible). The output is a list of each factor solution, as well as a summary matrix of loadings and interfactor correlations for all groups. } \value{ \item{means}{The means for each group for each variable. } \item{sd}{The standard deviations for each group for each variable.} \item{n}{The number of cases for each group and for each variable.} \item{ICC1}{The intraclass correlation reflects the amount of total variance associated with the grouping variable.} \item{ICC2}{The intraclass correlation (2) reflecting how much the groups means differ.} \item{ci1}{The confidence intervals for the ICC1} \item{ci2}{The confidence intervals for the ICC2} \item{F}{The F from a one-way anova of group means.} \item{rwg}{The pooled within group correlations.} \item{ci.wg}{The confidence intervals of the pooled within group correlations.} \item{rbg}{The sample size weighted between group correlations. } \item{c.bg}{The confidence intervals of the rbg values} \item{etawg}{The correlation of the data with the within group values.} \item{etabg}{The correlation of the data with the group means.} \item{pbg}{The probability of the between group correlation} \item{pwg}{The probability of the within group correlation} \item{r}{In the case that we want the correlations in each group, r is a list of the within group correlations for every group. Set cors=TRUE} \item{within}{is just another way of displaying these correlations. within is a matrix which reports the lower off diagonal correlations as one row for each group.} \item{pooled}{The sample size weighted correlations. This is just within weighted by the sample sizes. The cors option must be set to TRUE to get this. See the note. } \item{slope}{The within group slopes -- if using cor="cov" these will be the b weights} } \references{ Pedhazur, E.J. (1997) Multiple regression in behavioral research: explanation and prediction. Harcourt Brace. } \author{William Revelle } \note{If finding polychoric correlations, the two estimates of the pooled within group correlations will differ, for the pooled value is the weighted polychoric correlation, but the rwg is the Pearson correlation. As of March, 2020 the average is based upon the weighted FisherZ correlation (back transformed) rather than the average correlation. The value of rbg (the between group correlation) is the group size weighted correlation. This is not the same as just finding the correlation of the group means (i.e. cor(means)). The statsBy.boot function will sometimes fail if sampling with replacement because if the group sizes differ drastically, some groups will be empty. In this case, sample without replacement. The statsBy.boot function can take a long time. (As I am writing this, I am running 1000 replications of a problem with 64,000 cases and 84 groups. It is taking about 3 seconds per replication on a MacBook Pro.) The \code{\link{faBy}} function takes the output of statsBy (with the cors=TRUE option) and then factors each individual subject. By default, the solutions are organized so that the factors "match" the group solution in terms of their order. It is also possible to attempt to force the solutions to match by order and also by using the TargetQ rotation function. (free=FALSE) } \seealso{\code{\link{describeBy}} and the functions within the multilevel package. } \examples{ #Taken from Pedhazur, 1997 pedhazur <- structure(list(Group = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), X = c(5L, 2L, 4L, 6L, 3L, 8L, 5L, 7L, 9L, 6L), Y = 1:10), .Names = c("Group", "X", "Y"), class = "data.frame", row.names = c(NA, -10L)) pedhazur ped.stats <- statsBy(pedhazur,"Group") ped.stats #Now do this for the sat.act data set sat.stats <- statsBy(sat.act,c("education","gender"),cors=TRUE) #group by two grouping variables print(sat.stats,short=FALSE) lowerMat(sat.stats$pbg) #get the probability values #show means by groups round(sat.stats$mean) #Do separate factor analyses for each group sb.bfi <- statsBy(psychTools::bfi[1:10], group=psychTools::bfi$gender,cors=TRUE) faBy(sb.bfi,1) #one factor per group faBy(sb.bfi,2) #two factors per group } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } \keyword{ models } psych/man/anova.psych.Rd0000644000176200001440000000467614536634356014727 0ustar liggesusers\name{anova.psych} \alias{anova.psych} \title{Model comparison for regression, mediation, and factor analysis} \description{ When doing regressions from the data or from a correlation matrix using \code{\link{setCor}} or doing a mediation analysis using \code{link{mediate}}, it is useful to compare alternative models. Since these are both regression models, the appropriate test is an Analysis of Variance. Similar tests, using Chi Square may be done for factor analytic models. } \usage{ \method{anova}{psych}(object,...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{An object from \code{\link{setCor}}, \code{\link{mediate}}, \code{\link{omega}}, or \code{\link{fa}}. } \item{\dots}{More objects of the same type may be supplied here} } \details{ \code{\link{setCor}} returns the SE.residual and degrees of freedom. These are converted to SSR and then an analysis of variance is used to compare two (or more) models. For \code{\link{omega}} or \code{\link{fa}} the change in the ML chisquare statistic as a function of change in df is reported. } \value{An ANOVA table comparing the models.} \author{ Wiliam Revelle } \note{ The code has been adapted from the anova.lm function in stats and the anova.sem by John Fox. } \seealso{\code{\link{setCor}}, \code{\link{mediate}}, \code{\link{omega}}, \code{\link{fa}} } \examples{ if(require("psychTools")) { m1 <- lmCor(reaction ~ import, data = Tal_Or,std=FALSE) m2 <- lmCor(reaction ~ import+pmi, data = Tal_Or,std=FALSE) m3 <- lmCor(reaction ~ import+pmi + cond, data = Tal_Or,std=FALSE) anova(m1,m2,m3) } #Several interesting test cases are taken from analyses of the Spengler data set #Although the sample sizes are actually very large in the first wave, I use the #sample sizes from the last wave #This data set is actually in psychTools but is copied here until we can update psychTools #We set the n.iter to be 50 instead of the default value of 5,000 if(require("psychTools")) { mod1 <- mediate(Income.50 ~ IQ + Parental+ (Ed.11) ,data=Spengler, n.obs = 1952, n.iter=50) mod2 <- mediate(Income.50 ~ IQ + Parental+ (Ed.11) + (Income.11) ,data=Spengler,n.obs = 1952, n.iter=50) #Now, compare these models anova(mod1,mod2) } f3 <- fa(Thurstone,3,n.obs=213) #we need to specifiy the n.obs for the test to work f2 <- fa(Thurstone,2, n.obs=213) anova(f2,f3) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{multivariate }% __ONLY ONE__ keyword per line psych/man/congruence.Rd0000644000176200001440000000754414150022207014575 0ustar liggesusers \name{congruence} \alias{congruence} \alias{cohen.profile} \alias{distance} \title{Matrix and profile congruences and distances} \description{The congruence coefficient two matrices is just the cross product of their respective values divided by the square root of their sums of squares. If the columns are zero centered, this is just the correlation. If the columns are centered around the scale neutral point, this is Cohen's profile correlation. A set of distances (city block, euclidean, Minkowski) may be found by the distance function. } \usage{ congruence(x,y=NULL) cohen.profile(x,y=NULL ,M=NULL) distance(x,y=NULL,r=2) } \arguments{ \item{x}{ A matrix of factor loadings or a list of matrices of factor loadings} \item{y}{ A second matrix of factor loadings (if x is a list, then y may be empty)} \item{M}{The midpoint of the items which should be used for reflection. NULL if to be found from the data. } \item{r}{The exponent for the generalized distance. (r=1 is city block, r=2 is euclidian, r=100 or larger emphasize the largest distance )} } \details{ Congruences are the cosines of pairs of vectors defined by a matrix and based at the origin. Thus, for values that differ only by a scaler the congruence will be 1. For two matrices, F1 and F2, the measure of congruence, phi, is \deqn{ \phi = \frac{\sum F_1 F_2}{\sqrt{\sum(F_1^2)\sum(F_2^2)}} .}{phi = sum(F1 F2)/sqrt(sum(F1^2) sum(F2^2)) } It is an interesting exercise to compare congruences with the correlations of the respective values. Congruences are based upon the raw cross products, while correlations are based upon centered cross products. That is, correlations of items are cosines of the vectors based at the mean loading for each column. \deqn{ \phi = \frac{\sum (F_1-a) (F_2 - b)}{\sqrt{\sum((F_1-a)^2)\sum((F_2-b)^2)}} .}{phi = sum((F1-a)(F2-b))/sqrt(sum((F1-a)^2) sum((F2-b)^2)) }. For congruence coefficients, a = b= 0. For correlations a=mean F1, b= mean F2. Input may either be one or two matrices. For congruences of factor or component loading matrices, use \code{\link{factor.congruence}}. Normally, all factor loading matrices should be complete (have no missing loadings). In the case where some loadings are missing, if the use option is specified, then variables with missing loadings are dropped. If the data are zero centered, this is the correlation, if the data are centered around the scale midpoint (M), this is Cohen's Similarity coefficient. See examples. If M is not specified, it is found as the midpoint of the items in x and y. \code{\link{cohen.profile}} applies the \code{\link{congruence}} function to data centered around M. M may be specified, or found from the data. The last example is taken from Cohen (1969). \code{\link{distance}} finds the generalized distance as a function of r. City block (r=1), Euclidean (r=2) or weighted towards maximimum (r >2). } \value{A matrix of congruences or distances. } \references{ Burt, Cyril (1948) Methods of factor-analysis with and without successive approximation. British Journal of Educational Psychology, 7 (2) 172-195. Burt, Cyril (1948) The factorial study of temperamental traits. British Journal of Statistical Psychology, 1(3) 178-203. Cohen, Jacob (1969), rc: A profile similarity coefficient invariant over variable reflection. Psychological Bulletin, 71 (4) 281-284. } \author{William Revelle } \seealso{\code{\link{factor.congruence}}. } \examples{#cohen's example # a and b have reversed one item around the midpoint co <- data.frame(ira=c(2,6,5,6,4), jim=c(1,3,5,4,4), a=c(5,6,5,6,4),b=c(6,3,5,4,4)) lowerMat(congruence(co-3.5)) # congruence around the midpoint is insensitive to reflection lowerCor(co) #the correlation is not lowerMat(congruence(scale(co,scale=FALSE))) #zero centered congruence is r cohen.profile(co) } \keyword{ multivariate } \keyword{ models } % Use only one concept per line. psych/man/fa.Rd0000644000176200001440000013134014537105604013037 0ustar liggesusers\name{fa} \alias{fa} \alias{fac} \alias{fa.sapa} \alias{fa.pooled} \title{Exploratory Factor analysis using MinRes (minimum residual) as well as EFA by Principal Axis, Weighted Least Squares or Maximum Likelihood } \description{Among the many ways to do latent variable exploratory factor analysis (EFA), one of the better is to use Ordinary Least Squares (OLS) to find the minimum residual (minres) solution. This produces solutions very similar to maximum likelihood even for badly behaved matrices. A variation on minres is to do weighted least squares (WLS). Perhaps the most conventional technique is principal axes (PAF). An eigen value decomposition of a correlation matrix is done and then the communalities for each variable are estimated by the first n factors. These communalities are entered onto the diagonal and the procedure is repeated until the sum(diag(r)) does not vary. Yet another estimate procedure is maximum likelihood. For well behaved matrices, maximum likelihood factor analysis (either in the fa or in the factanal function) is probably preferred. Bootstrapped confidence intervals of the loadings and interfactor correlations are found by fa with n.iter > 1. } \usage{ fa(r, nfactors=1, n.obs = NA, n.iter=1, rotate="oblimin", scores="regression", residuals=FALSE, SMC=TRUE, covar=FALSE, missing=FALSE, impute="none", min.err = 0.001, max.iter = 50, symmetric=TRUE, warnings=TRUE, fm="minres", alpha=.1, p=.05, oblique.scores=FALSE, np.obs=NULL, use="pairwise", cor="cor", correct=.5, weight=NULL, n.rotations=1, hyper=.15, smooth=TRUE,...) fac(r,nfactors=1,n.obs = NA, rotate="oblimin", scores="tenBerge", residuals=FALSE, SMC=TRUE, covar=FALSE, missing=FALSE, impute="median", min.err = 0.001, max.iter=50, symmetric=TRUE, warnings=TRUE, fm="minres", alpha=.1, oblique.scores=FALSE, np.obs=NULL, use="pairwise", cor="cor", correct=.5, weight=NULL, n.rotations=1, hyper=.15, smooth=TRUE,...) fa.pooled(datasets,nfactors=1,rotate="oblimin", scores="regression", residuals=FALSE, SMC=TRUE, covar=FALSE, missing=FALSE,impute="median", min.err = .001, max.iter=50,symmetric=TRUE, warnings=TRUE, fm="minres", alpha=.1, p =.05, oblique.scores=FALSE,np.obs=NULL, use="pairwise", cor="cor", correct=.5, weight=NULL,...) fa.sapa(r,nfactors=1, n.obs = NA,n.iter=1,rotate="oblimin",scores="regression", residuals=FALSE, SMC=TRUE, covar=FALSE, missing=FALSE, impute="median", min.err = .001, max.iter=50,symmetric=TRUE,warnings=TRUE, fm="minres", alpha=.1, p =.05, oblique.scores=FALSE, np.obs=NULL, use="pairwise", cor="cor", correct=.5, weight=NULL, frac=.1,...) } \arguments{ \item{r}{A correlation or covariance matrix or a raw data matrix. If raw data, the correlation matrix will be found using pairwise deletion. If covariances are supplied, they will be converted to correlations unless the covar option is TRUE.} \item{nfactors}{ Number of factors to extract, default is 1 } \item{n.obs}{Number of observations used to find the correlation matrix if using a correlation matrix. Used for finding the goodness of fit statistics. Must be specified if using a correlaton matrix and finding confidence intervals.} \item{np.obs}{The pairwise number of observations. Used if using a correlation matrix and asking for a minchi solution.} \item{rotate}{"none", "varimax", "quartimax", "bentlerT", "equamax", "varimin", "geominT" and "bifactor" are orthogonal rotations. "Promax", "promax", "oblimin", "simplimax", "bentlerQ, "geominQ" and "biquartimin" and "cluster" are possible oblique transformations of the solution. The default is to do a oblimin transformation, although versions prior to 2009 defaulted to varimax. SPSS seems to do a Kaiser normalization before doing Promax, this is done here by the call to "promax" which does the normalization before calling Promax in GPArotation.} \item{n.rotations}{Number of random starts for the rotations.} \item{n.iter}{Number of bootstrap interations to do in fa or fa.poly} \item{residuals}{Should the residual matrix be shown } \item{scores}{the default="regression" finds factor scores using regression. Alternatives for estimating factor scores include simple regression ("Thurstone"), correlaton preserving ("tenBerge") as well as "Anderson" and "Bartlett" using the appropriate algorithms ( \code{\link{factor.scores}}). Although scores="tenBerge" is probably preferred for most solutions, it will lead to problems with some improper correlation matrices. } \item{SMC}{Use squared multiple correlations (SMC=TRUE) or use 1 as initial communality estimate. Try using 1 if imaginary eigen values are reported. If SMC is a vector of length the number of variables, then these values are used as starting values in the case of fm='pa'. } \item{covar}{if covar is TRUE, factor the covariance matrix, otherwise factor the correlation matrix} \item{missing}{if scores are TRUE, and missing=TRUE, then impute missing values using either the median or the mean. Specifying impute="none" will not impute data points and thus will have some missing data in the factor scores.} \item{impute}{"median" or "mean" values are used to replace missing values} \item{min.err}{Iterate until the change in communalities is less than min.err} \item{max.iter}{Maximum number of iterations for convergence } \item{symmetric}{symmetric=TRUE forces symmetry by just looking at the lower off diagonal values} \item{hyper}{Count number of (absolute) loadings less than hyper. } \item{warnings}{warnings=TRUE => warn if number of factors is too many } \item{fm}{Factoring method fm="minres" will do a minimum residual as will fm="uls". Both of these use a first derivative. fm="ols" differs very slightly from "minres" in that it minimizes the entire residual matrix using an OLS procedure but uses the empirical first derivative. This will be slower. fm="wls" will do a weighted least squares (WLS) solution, fm="gls" does a generalized weighted least squares (GLS), fm="pa" will do the principal factor solution, fm="ml" will do a maximum likelihood factor analysis. fm="minchi" will minimize the sample size weighted chi square when treating pairwise correlations with different number of subjects per pair. fm ="minrank" will do a minimum rank factor analysis. "old.min" will do minimal residual the way it was done prior to April, 2017 (see discussion below). fm="alpha" will do alpha factor analysis as described in Kaiser and Coffey (1965)} \item{alpha}{alpha level for the confidence intervals for RMSEA} \item{p}{if doing iterations to find confidence intervals, what probability values should be found for the confidence intervals} \item{oblique.scores}{When factor scores are found, should they be based on the structure matrix (default) or the pattern matrix (oblique.scores=TRUE). Now it is always false. If you want oblique factor scores, use tenBerge. } \item{weight}{If not NULL, a vector of length n.obs that contains weights for each observation. The NULL case is equivalent to all cases being weighted 1.} \item{use}{How to treat missing data, use="pairwise" is the default". See cor for other options.} \item{cor}{How to find the correlations: "cor" is Pearson", "cov" is covariance, "tet" is tetrachoric, "poly" is polychoric, "mixed" uses mixed cor for a mixture of tetrachorics, polychorics, Pearsons, biserials, and polyserials, Yuleb is Yulebonett, Yuleq and YuleY are the obvious Yule coefficients as appropriate} \item{correct}{When doing tetrachoric, polycoric, or mixed cor, how should we treat empty cells. (See the discussion in the help for tetrachoric.)} \item{frac}{The fraction of data to sample n.iter times if showing stability across sample sizes} \item{datasets}{A list of replication data sets to analyse in fa.pooled. All need to have the same number of variables.} \item{smooth}{By default, improper correlations matrices are smoothed. This is not necessary for factor extraction using fm="pa" or fm="minres", but is needed to give some of the goodness of fit tests. (see notes)} \item{...}{additional parameters, specifically, keys may be passed if using the target rotation, or delta if using geominQ, or whether to normalize if using Varimax} } \details{Factor analysis is an attempt to approximate a correlation or covariance matrix with one of lesser rank. The basic model is that \eqn{_nR_n \approx _{n}F_{kk}F_n'+ U^2}{nRn = nFk kFn' + U2} where k is much less than n. There are many ways to do factor analysis, and maximum likelihood procedures are probably the most commonly preferred (see \code{\link{factanal}} ). The existence of uniquenesses is what distinguishes factor analysis from principal components analysis (e.g., \code{\link{principal}}). If variables are thought to represent a ``true" or latent part then factor analysis provides an estimate of the correlations with the latent factor(s) representing the data. If variables are thought to be measured without error, then principal components provides the most parsimonious description of the data. Factor loadings will be smaller than component loadings for the later reflect unique error in each variable. The off diagonal residuals for a factor solution will be superior (smaller) than that of a component model. Factor loadings can be thought of as the asymptotic component loadings as the number of variables loading on each factor increases. The fa function will do factor analyses using one of six different algorithms: minimum residual (minres, aka ols, uls), principal axes, alpha factoring, weighted least squares, minimum rank, or maximum likelihood. Principal axes factor analysis has a long history in exploratory analysis and is a straightforward procedure. Successive eigen value decompositions are done on a correlation matrix with the diagonal replaced with diag (FF') until \eqn{\sum(diag(FF'))} does not change (very much). The current limit of max.iter =50 seems to work for most problems, but the Holzinger-Harmon 24 variable problem needs about 203 iterations to converge for a 5 factor solution. Not all factor programs that do principal axes do iterative solutions. The example from the SAS manual (Chapter 33) is such a case. To achieve that solution, it is necessary to specify that the max.iter = 1. Comparing that solution to an iterated one (the default) shows that iterations improve the solution. In addition, fm="mle" produces even better solutions for this example. Both the RMSEA and the root mean square of the residuals are smaller than the fm="pa" solution. However, simulations of multiple problem sets suggest that fm="pa" tends to produce slightly smaller residuals while having slightly larger RMSEAs than does fm="minres" or fm="mle". That is, the sum of squared residuals for fm="pa" seem to be slightly smaller than those found using fm="minres" but the RMSEAs are slightly worse when using fm="pa". That is to say, the "true" minimal residual is probably found by fm="pa". Following extensive correspondence with Hao Wu and Mikko Ronkko, in April, 2017 the derivative of the minres and uls) fitting was modified. This leads to slightly smaller residuals (appropriately enough for a method claiming to minimize them) than the prior procedure. For consistency with prior analyses, "old.min" was added to give these slightly larger residuals. The differences between old.min and the newer "minres" and "ols" solutions are at the third to fourth decimal, but none the less, are worth noting. For comparison purposes, the fm="ols" uses empirical first derivatives, while uls and minres use equation based first derivatives. The results seem to be identical, but the minres and uls solutions require fewer iterations for larger problems and are faster. Thanks to Hao Wu for some very thoughtful help. Although usually these various algorithms produce equivalent results, there are several data sets included that show large differences between the methods. \code{\link[psychTools]{Schutz}} produces Heywood and super Heywood cases, \code{\link[psychTools]{blant}} leads to very different solutions. In particular, the minres solution produces smaller residuals than does the mle solution, and the factor.congruence coefficients show very different solutions. A very strong argument against using MLE is found in the chapter by MacCallum, Brown and Cai (2007) who show that OLS approaches produce equivalent solutions most of the time, and better solutions some of the time. This particularly in the case of models with some unmodeled small factors. (See \code{\link{sim.minor}} to generate such data.) Principal axes may be used in cases when maximum likelihood solutions fail to converge, although fm="minres" will also do that and tends to produce better (smaller RMSEA) solutions. The fm="minchi" option is a variation on the "minres" (ols) solution and minimizes the sample size weighted residuals rather than just the residuals. This was developed to handle the problem of data that Massively Missing Completely at Random (MMCAR) which a condition that happens in the SAPA project. A traditional problem in factor analysis was to find the best estimate of the original communalities in order to speed up convergence. Using the Squared Multiple Correlation (SMC) for each variable will underestimate the original communalities, using 1s will over estimate. By default, the SMC estimate is used. Note that in the case of non-invertible matrices, the pseudo-inverse is found so smcs are still estimated. In either case, iterative techniques will tend to converge on a stable solution. If, however, a solution fails to be achieved, it is useful to try again using ones (SMC =FALSE). Alternatively, a vector of starting values for the communalities may be specified by the SMC option. The iterated principal axes algorithm does not attempt to find the best (as defined by a maximum likelihood criterion) solution, but rather one that converges rapidly using successive eigen value decompositions. The maximum likelihood criterion of fit and the associated chi square value are reported, and will be (slightly) worse than that found using maximum likelihood procedures. The minimum residual (minres) solution is an unweighted least squares solution that takes a slightly different approach. It uses the \code{\link{optim}} function and adjusts the diagonal elements of the correlation matrix to mimimize the squared residual when the factor model is the eigen value decomposition of the reduced matrix. MINRES and PA will both work when ML will not, for they can be used when the matrix is singular. Although before the change in the derivative, the MINRES solution was slightly more similar to the ML solution than is the PA solution. With the change in the derivative of the minres fit, the minres, pa and uls solutions are practically identical. To a great extent, the minres and wls solutions follow ideas in the \code{\link{factanal}} function with the change in the derivative. The weighted least squares (wls) solution weights the residual matrix by 1/ diagonal of the inverse of the correlation matrix. This has the effect of weighting items with low communalities more than those with high communalities. The generalized least squares (gls) solution weights the residual matrix by the inverse of the correlation matrix. This has the effect of weighting those variables with low communalities even more than those with high communalities. The maximum likelihood solution takes yet another approach and finds those communality values that minimize the chi square goodness of fit test. The fm="ml" option provides a maximum likelihood solution following the procedures used in \code{\link{factanal}} but does not provide all the extra features of that function. It does, however, produce more expansive output. The minimum rank factor model (MRFA) roughly follows ideas by Shapiro and Ten Berge (2002) and Ten Berge and Kiers (1991). It makes use of the glb.algebraic procedure contributed by Andreas Moltner. MRFA attempts to extract factors such that the residual matrix is still positive semi-definite. This version is still being tested and feedback is most welcome. Alpha factor analysis finds solutions based upon a correlation matrix corrected for communalities and then rescales these to the original correlation matrix. This procedure is described by Kaiser and Coffey, 1965. Test cases comparing the output to SPSS suggest that the PA algorithm matches what SPSS calls uls, and that the wls solutions are equivalent in their fits. The wls and gls solutions have slightly larger eigen values, but slightly worse fits of the off diagonal residuals than do the minres or maximum likelihood solutions. Comparing the results to the examples in Harman (1976), the PA solution with no iterations matches what Harman calls Principal Axes (as does SAS), while the iterated PA solution matches his minres solution. The minres solution found in psych tends to have slightly smaller off diagonal residuals (as it should) than does the iterated PA solution. Although for items, it is typical to find factor scores by scoring the salient items (using, e.g., \code{\link{scoreItems}}) factor scores can be estimated by regression as well as several other means. There are multiple approaches that are possible (see Grice, 2001) and one taken here was developed by tenBerge et al. (see \code{\link{factor.scores}}). The alternative, which will match factanal is to find the scores using regression -- Thurstone's least squares regression where the weights are found by \eqn{W = R^{-1}S}{W = inverse(R)S} where R is the correlation matrix of the variables ans S is the structure matrix. Then, factor scores are just \eqn{Fs = X W}{Fs = X W}. In the oblique case, the factor loadings are referred to as Pattern coefficients and are related to the Structure coefficients by \eqn{S = P \Phi}{S = P Phi} and thus \eqn{P = S \Phi^{-1}}{P = S Phi^-1}. When estimating factor scores, \code{\link{fa}} and \code{\link{factanal}} differ in that \code{\link{fa}} finds the factors from the Structure matrix while \code{\link{factanal}} seems to do it from the Pattern matrix. Thus, although in the orthogonal case, fa and factanal agree perfectly in their factor score estimates, they do not agree in the case of oblique factors. Setting oblique.scores = TRUE will produce factor score estimate that match those of \code{\link{factanal}}. It is sometimes useful to extend the factor solution to variables that were not factored. This may be done using \code{\link{fa.extension}}. Factor extension is typically done in the case where some variables were not appropriate to factor, but factor loadings on the original factors are still desired. Factor extension is a very powerful procedure in that it allows one to find the factor-criterion correlations without using factor scores. For dichotomous items or polytomous items, it is recommended to analyze the \code{\link{tetrachoric}} or \code{\link{polychoric}} correlations rather than the Pearson correlations. This may be done by specifying cor="poly" or cor="tet" or cor="mixed" if the data have a mixture of dichotomous, polytomous, and continous variables. Analysis of dichotomous or polytomous data may also be done by using \code{\link{irt.fa}} or simply setting the cor="poly" option. In the first case, the factor analysis results are reported in Item Response Theory (IRT) terms, although the original factor solution is returned in the results. In the later case, a typical factor loadings matrix is returned, but the tetrachoric/polychoric correlation matrix and item statistics are saved for reanalysis by \code{\link{irt.fa}}. (See also the \code{\link{mixed.cor}} function to find correlations from a mixture of continuous, dichotomous, and polytomous items.) Of the various rotation/transformation options, varimax, Varimax, quartimax, bentlerT, geominT, and bifactor do orthogonal rotations. Promax transforms obliquely with a target matix equal to the varimax solution. oblimin, quartimin, simplimax, bentlerQ, geominQ and biquartimin are oblique transformations. Most of these are just calls to the GPArotation package. The ``cluster'' option does a targeted rotation to a structure defined by the cluster representation of a varimax solution. With the optional "keys" parameter, the "target" option will rotate to a target supplied as a keys matrix. (See \code{\link{target.rot}}.) oblimin is implemented in GPArotation by a call to quartimin with delta=0. This leads to confusion when people refer to quartimin solutions. It is important to note a dirty little secret about factor rotation. That is the problem of local minima. Multiple restarts of the rotation algorithms are strongly encouraged (see Nguyen and Waller, N. G. 2021). Two additional target rotation options are available through calls to GPArotation. These are the targetQ (oblique) and targetT (orthogonal) target rotations of Michael Browne. See \code{\link{target.rot}} for more documentation. The "bifactor" rotation implements the Jennrich and Bentler (2011) bifactor rotation by calling the GPForth function in the GPArotation package and using two functions adapted from the MatLab code of Jennrich and Bentler. This seems to have a problem with local minima and multiple starting values should be used. There are two varimax rotation functions. One, Varimax, in the GPArotation package does not by default apply Kaiser normalization. The other, varimax, in the stats package, does. It appears that the two rotation functions produce slightly different results even when normalization is set. For consistency with the other rotation functions, Varimax is probably preferred. The rotation matrix (rot.mat) is returned from all of these options. This is the inverse of the Th (theta?) object returned by the GPArotation package. The correlations of the factors may be found by \eqn{\Phi = \theta' \theta}{Phi = Th' Th} There are two ways to handle dichotomous or polytomous responses: \code{\link{fa}} with the cor="poly" option which will return the tetrachoric or polychoric correlation matrix, as well as the normal factor analysis output, and \code{\link{irt.fa}} which returns a two parameter irt analysis as well as the normal fa output. When factor analyzing items with dichotomous or polytomous responses, the \code{\link{irt.fa}} function provides an Item Response Theory representation of the factor output. The factor analysis results are available, however, as an object in the irt.fa output. \code{\link{fa.poly}} is deprecated, for its functioning is matched by setting cor="poly". It will produce normal factor analysis output but also will save the polychoric matrix (rho) and items difficulties (tau) for subsequent irt analyses. \code{\link{fa.poly}} will, by default, find factor scores if the data are available. The correlations are found using either \code{\link{tetrachoric}} or \code{\link{polychoric}} and then this matrix is factored. Weights from the factors are then applied to the original data to estimate factor scores. The function \code{\link{fa}} will repeat the analysis n.iter times on a bootstrapped sample of the data (if they exist) or of a simulated data set based upon the observed correlation matrix. The mean estimate and standard deviation of the estimate are returned and will print the original factor analysis as well as the alpha level confidence intervals for the estimated coefficients. The bootstrapped solutions are rotated towards the original solution using target.rot. The factor loadings are z-transformed, averaged and then back transformed. This leads to an error in the case of Heywood cases. The probably better alternative is to just find the mean bootstrapped value and find the confidence intervals based upon the observed range of values. The default is to have n.iter =1 and thus not do bootstrapping. If using polytomous or dichotomous items, it is perhaps more useful to find the Item Response Theory parameters equivalent to the factor loadings reported in fa.poly by using the \code{\link{irt.fa}} function. For those who like SPSS type output, the measure of factoring adequacy known as the Kaiser-Meyer-Olkin \code{\link{KMO}} test may be found from the correlation matrix or data matrix using the \code{\link{KMO}} function. Similarly, the Bartlett's test of Sphericity may be found using the \code{\link{cortest.bartlett}} function. For those who want to have an object of the variances accounted for, this is returned invisibly by the print function. (e.g., p <- print(fa(ability))$Vaccounted ). This is now returned by the fa function as well (e.g. p <- fa(ability)$Vaccounted ). Just as communalities may be found by the diagonal of Pattern \%*\% t(Structure) so can the variance accounted for be found by diagonal ( t(Structure) \%*\% Pattern. Note that referred to as SS loadings. The output from the print.psych.fa function displays the factor loadings (from the pattern matrix, the h2 (communalities) the u2 (the uniquenesses), com (the complexity of the factor loadings for that variable (see below). In the case of an orthogonal solution, h2 is merely the row sum of the squared factor loadings. But for an oblique solution, it is the row sum of the orthogonal factor loadings (remember, that rotations or transformations do not change the communality). In response to a request from Asghar Minaei who wanted to combine several imputation data sets from mice, fa.pooled was added. The separate data sets are combined into a list (datasets) which are then factored separately. Each solution is rotated towards the first set in the list. The results are then reported in terms of pooled loadings and confidence intervals based upon the replications. \code{\link{fa.sapa}} simulates the process of doing SAPA (Synthetic Aperture Personality Assessment). It will do iterative solutions for successive random samples of fractions (frac) of the data set. This allows us to find the stability of solutions for various sample sizes and various sample rates. Need to specify the number of iterations (n.iter) as well as the percent of data sampled (frac). } \value{ \item{values }{Eigen values of the common factor solution} \item{e.values}{Eigen values of the original matrix} \item{communality}{Communality estimates for each item. These are merely the sum of squared factor loadings for that item.} \item{communalities}{If using minrank factor analysis, these are the communalities reflecting the total amount of common variance. They will exceed the communality (above) which is the model estimated common variance. } \item{rotation}{which rotation was requested?} \item{n.obs}{number of observations specified or found} \item{loadings}{An item by factor (pattern) loading matrix of class ``loadings" Suitable for use in other programs (e.g., GPA rotation or factor2cluster. To show these by sorted order, use \code{\link{print.psych}} with sort=TRUE} \item{complexity}{Hoffman's index of complexity for each item. This is just \eqn{\frac{(\Sigma a_i^2)^2}{\Sigma a_i^4}}{{(\Sigma a_i^2)^2}/{\Sigma a_i^4}} where a_i is the factor loading on the ith factor. From Hofmann (1978), MBR. See also Pettersson and Turkheimer (2010).} \item{Structure}{An item by factor structure matrix of class ``loadings". This is just the loadings (pattern) matrix times the factor intercorrelation matrix.} \item{fit}{How well does the factor model reproduce the correlation matrix. This is just \eqn{\frac{\Sigma r_{ij}^2 - \Sigma r^{*2}_{ij} }{\Sigma r_{ij}^2} }{(sum(r^2ij - sum(r*^2ij))/sum(r^2ij} (See \code{\link{VSS}}, \code{\link{ICLUST}}, and \code{\link{principal}} for this fit statistic.} \item{fit.off}{how well are the off diagonal elements reproduced?} \item{dof}{Degrees of Freedom for this model. This is the number of observed correlations minus the number of independent parameters. Let n=Number of items, nf = number of factors then \cr \eqn{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}{dof = n * (n-1)/2 - n * nf + nf*(nf-1)/2}} \item{objective}{Value of the function that is minimized by a maximum likelihood procedures. This is reported for comparison purposes and as a way to estimate chi square goodness of fit. The objective function is \cr \eqn{f = log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^{-1} R|) - n.items}{log(trace ((FF'+U2)^{-1} R) - log(|(FF'+U2)^-1 R|) - n.items}. When using MLE, this function is minimized. When using OLS (minres), although we are not minimizing this function directly, we can still calculate it in order to compare the solution to a MLE fit. } \item{STATISTIC}{If the number of observations is specified or found, this is a chi square based upon the objective function, f (see above). Using the formula from \code{\link{factanal}}(which seems to be Bartlett's test) : \cr \eqn{\chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f }{chi^2 = (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3)) * f } } \item{PVAL}{If n.obs > 0, then what is the probability of observing a chisquare this large or larger?} \item{Phi}{If oblique rotations (e.g,m using oblimin from the GPArotation package or promax) are requested, what is the interfactor correlation?} \item{communality.iterations}{The history of the communality estimates (For principal axis only.) Probably only useful for teaching what happens in the process of iterative fitting.} \item{residual}{The matrix of residual correlations after the factor model is applied. To display it conveniently, use the \code{\link{residuals}} command. } \item{chi}{When normal theory fails (e.g., in the case of non-positive definite matrices), it useful to examine the empirically derived \eqn{\chi^2}{chi^2} based upon the sum of the squared residuals * N. This will differ slightly from the MLE estimate which is based upon the fitting function rather than the actual residuals.} \item{rms}{This is the sum of the squared (off diagonal residuals) divided by the degrees of freedom. Comparable to an RMSEA which, because it is based upon \eqn{\chi^2}{chi^2}, requires the number of observations to be specified. The rms is an empirical value while the RMSEA is based upon normal theory and the non-central \eqn{\chi^2}{chi^2} distribution. That is to say, if the residuals are particularly non-normal, the rms value and the associated \eqn{\chi^2}{chi^2} and RMSEA can differ substantially. } \item{crms}{rms adjusted for degrees of freedom} \item{RMSEA}{The Root Mean Square Error of Approximation is based upon the non-central \eqn{\chi^2}{chi^2} distribution and the \eqn{\chi^2}{chi^2} estimate found from the MLE fitting function. With normal theory data, this is fine. But when the residuals are not distributed according to a noncentral \eqn{\chi^2}{chi^2}, this can give very strange values. (And thus the confidence intervals can not be calculated.) The RMSEA is a conventional index of goodness (badness) of fit but it is also useful to examine the actual rms values. } \item{TLI}{The Tucker Lewis Index of factoring reliability which is also known as the non-normed fit index. } \item{BIC}{Based upon \eqn{\chi^2}{chi^2} with the assumption of normal theory and using the \eqn{\chi^2}{chi^2} found using the objective function defined above. This is just \eqn{\chi^2 - 2 df}{chi^2 - 2 df}} \item{eBIC}{When normal theory fails (e.g., in the case of non-positive definite matrices), it useful to examine the empirically derived eBIC based upon the empirical \eqn{\chi^2}{chi^2} - 2 df. } \item{R2}{The multiple R square between the factors and factor score estimates, if they were to be found. (From Grice, 2001). Derived from R2 is is the minimum correlation between any two factor estimates = 2R2-1. } \item{r.scores}{The correlations of the factor score estimates using the specified model, if they were to be found. Comparing these correlations with that of the scores themselves will show, if an alternative estimate of factor scores is used (e.g., the tenBerge method), the problem of factor indeterminacy. For these correlations will not necessarily be the same. } \item{weights}{The beta weights to find the factor score estimates. These are also used by the \code{\link{predict.psych}} function to find predicted factor scores for new cases. These weights will depend upon the scoring method requested. } \item{scores}{The factor scores as requested. Note that these scores reflect the choice of the way scores should be estimated (see scores in the input). That is, simple regression ("Thurstone"), correlaton preserving ("tenBerge") as well as "Anderson" and "Bartlett" using the appropriate algorithms (see \code{\link{factor.scores}}). The correlation between factor score estimates (r.scores) is based upon using the regression/Thurstone approach. The actual correlation between scores will reflect the rotation algorithm chosen and may be found by correlating those scores. Although the scores are found by multiplying the standarized data by the weights matrix, this will not result in standard scores if using regression. } \item{valid}{The validity coffiecient of course coded (unit weighted) factor score estimates (From Grice, 2001)} \item{score.cor}{The correlation matrix of coarse coded (unit weighted) factor score estimates, if they were to be found, based upon the structure matrix rather than the weights or loadings matrix. } \item{rot.mat}{The rotation matrix as returned from GPArotation.} } \references{Gorsuch, Richard, (1983) Factor Analysis. Lawrence Erlebaum Associates. Grice, James W. (2001), Computing and evaluating factor scores. Psychological Methods, 6, 430-450 Harman, Harry and Jones, Wayne (1966) Factor analysis by minimizing residuals (minres), Psychometrika, 31, 3, 351-368. Hofmann, R. J. ( 1978 ) . Complexity and simplicity as objective indices descriptive of factor solutions. Multivariate Behavioral Research, 13, 247-250. Grieder, Silvia and Steiner, Markus. D. (2021) Algorithmic jingle jungle: A comparison of implementations of principal axis factoring and promax rotation in R and SPSS. Behavior Research Methods. Doi: 10.3758/s13428-021-01581 Kaiser, Henry F. and Caffrey, John. Alpha factor analysis, Psychometrika, (30) 1-14. Nguyen, H. V. & Waller, N. G. (in press). Local minima and factor rotations in exploratory factor analysis. Psychological Methods. Pettersson E, Turkheimer E. (2010) Item selection, evaluation, and simple structure in personality data. Journal of research in personality, 44(4), 407-420. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Shapiro, A. and ten Berge, Jos M. F, (2002) Statistical inference of minimum rank factor analysis. Psychometika, (67) 79-84. ten Berge, Jos M. F. and Kiers, Henk A. L. (1991). A numerical approach to the approximate and the exact minimum rank of a covariance matrix. Psychometrika, (56) 309-315. } \author{ William Revelle } \note{Thanks to Erich Studerus for some very helpful suggestions about various rotation and factor scoring algorithms, and to Gumundur Arnkelsson for suggestions about factor scores for singular matrices. The fac function is the original fa function which is now called by fa repeatedly to get confidence intervals. SPSS will sometimes use a Kaiser normalization before rotating. This will lead to different solutions than reported here. To get the Kaiser normalized loadings, use \code{\link{kaiser}}. The communality for a variable is the amount of variance accounted for by all of the factors. That is to say, for orthogonal factors, it is the sum of the squared factor loadings (rowwise). The communality is insensitive to rotation. However, if an oblique solution is found, then the communality is not the sum of squared pattern coefficients. In both cases (oblique or orthogonal) the communality is the diagonal of the reproduced correlation matrix where \eqn{_nR_n = _{n}P_{k k}\Phi_{k k}P_n'}{nRn = nPk k\Phi k kPn' } where P is the pattern matrix and \eqn{\Phi} is the factor intercorrelation matrix. This is the same, of course to multiplying the pattern by the structure: \eqn{R = P S'}{R = PS'} where the Structure matrix is \eqn{S = \Phi P}{S = Phi P}. Similarly, the eigen values are the diagonal of the product \eqn{ _k\Phi_{kk}P'_{nn}P_{k} }{\Phi_{k k}P'_nnP_k}. A frequently asked question is why are the factor names of the rotated solution not in ascending order? That is, for example, if factoring the 25 items of the bfi, the factor names are MR2 MR3 MR5 MR1 MR4, rather than the seemingly more logical "MR1" "MR2" "MR3" "MR4" "MR5". This is for pedagogical reasons, in that factors as extracted are orthogonal and are in order of amount of variance accounted for. But when rotated (orthogonally) or transformed (obliquely) the simple structure solution does not preserve that order. The factors are still ordered according to variance accounted for, but because rotation changes how much variance each factor explains, the order may not the be the same as the original order. The factor names are, of course, arbitrary, and are kept with the original names to show the effect of rotation/transformation. To give them names associated with their ordinal position, simply paste("F", 1:nf, sep="") where nf is the number of factors. See the last example. The print function for the fa output will return (invisibly) an object (Vaccounted) that matches the printed output for the variance accounted for by each factor, as well as the cumulative variance, and the percentage of variance accounted for by each factor. Correction to documentation: as of September, 2014, the oblique.scores option is correctly explained. (It had been backwards.) The default (oblique.scores=FALSE) finds scores based upon the Structure matrix, while oblique.scores=TRUE finds them based upon the pattern matrix. The latter case matches factanal. This error was detected by Mark Seeto. If the raw data are factored, factors scores are estimated. By default this will be done using 'regression' but alternatives are available. Although the scores are found by multiplying the standardized data by the weights, if using regression, the resulting factor scores will not necessarily have unit variance. In the case of missing data for some items, the factor scores will return NA for any case where any item is missing. Specifying missing=TRUE will return factor scores for all cases using the imputation option chosen. The minimum residual solution is done by finding those communalities that will minimize the off diagonal residual. The uls solution finds those communalities that minimize the total residuals. The minres solution has been modified (April, 2107) following suggestions by Hao Wu. Although the fitting function was the minimal residual, the first derivative of the fitting function was incorrect. This has now been modified so that the results match those of SPSS and CEFA. The prior solutions are still available using fm="old.min". Alpha factoring was added in August, 2017 to add to the numerous alternative models of factoring. A few more lines of output were added in August 2017 to show the measures of factor adequacy for different rotations. This had been available in the results from \code{\link{factor.scores}} but now is added to the fa output. For a comparison of the \code{\link{fa}} to factor analysis using SPSS, see Grieder and Steiner (2021). Some correlation matrices that arise from using pairwise deletion or from tetrachoric or polychoric matrices will not be proper. That is, they will not be positive semi-definite (all eigen values >= 0). The \code{\link{cor.smooth}} function will adjust correlation matrices (smooth them) by making all negative eigen values slightly greater than 0, rescaling the other eigen values to sum to the number of variables, and then recreating the correlation matrix. See \code{\link{cor.smooth}} for an example of this problem using the \code{\link[psychTools]{burt}} data set. One reason for this problem when using tetrachorics or polychorics seems to be the adjustment for continuity. Setting correct=0 turns this off and seems to produce more proper matrices. Although both fm ="pa" or fm="minres" will work with these ``improper" matrices, the goodness of fit tests do not. Thus, by default, calls to fa.stats will pass the smooth parameter as TRUE. This may be prevented by forcing smooth=FALSE. Npt smoothing does not affect the pa or minres solution, but will affect the goodness of fit statistics slightly. } \seealso{ \code{\link{principal}} for principal components analysis (PCA). PCA will give very similar solutions to factor analysis when there are many variables. The differences become more salient as the number variables decrease. The PCA and FA models are actually very different and should not be confused. One is a model of the observed variables, the other is a model of latent variables. Although some commercial packages (e.g., SPSS and SAS) refer to both as factor models, they are not. It is incorrect to report doing a factor analysis using principal components. \code{\link{irt.fa}} for Item Response Theory analyses using factor analysis, using the two parameter IRT equivalent of loadings and difficulties. \code{\link{fa.random}} applies a random intercepts model by removing the mean score for each subject prior to factoring. \code{\link{VSS}} will produce the Very Simple Structure (VSS) and MAP criteria for the number of factors, \code{\link{nfactors}} to compare many different factor criteria. \code{\link{ICLUST}} will do a hierarchical cluster analysis alternative to factor analysis or principal components analysis. \code{\link{factor.scores}} to find factor scores with alternative options. \code{\link{predict.psych}} to find predicted scores based upon new data, \code{\link{fa.extension}} to extend the factor solution to new variables, \code{\link{omega}} for hierarchical factor analysis with one general factor. \code{\link{fa.multi}} for hierarchical factor analysis with an arbitrary number of 2nd order factors. \code{\link{fa.sort}} will sort the factor loadings into echelon form. \code{\link{fa.organize}} will reorganize the factor pattern matrix into any arbitrary order of factors and items. \code{\link{KMO}} and \code{\link{cortest.bartlett}} for various tests that some people like. \code{\link{factor2cluster}} will prepare unit weighted scoring keys of the factors that can be used with \code{\link{scoreItems}}. \code{\link{fa.lookup}} will print the factor analysis loadings matrix along with the item ``content" taken from a dictionary of items. This is useful when examining the meaning of the factors. \code{\link{anova.psych}} allows for testing the difference between two (presumably nested) factor models . \code{\link{bassAckward}} will perform repeated factorings and organize them in a top-down structure suggested by Goldberg (2006) and Waller (2007). } \examples{ #using the Harman 24 mental tests, compare a principal factor with a principal components solution pc <- principal(Harman74.cor$cov,4,rotate="varimax") #principal components pa <- fa(Harman74.cor$cov,4,fm="pa" ,rotate="varimax") #principal axis uls <- fa(Harman74.cor$cov,4,rotate="varimax") #unweighted least squares is minres wls <- fa(Harman74.cor$cov,4,fm="wls") #weighted least squares #to show the loadings sorted by absolute value print(uls,sort=TRUE) #then compare with a maximum likelihood solution using factanal mle <- factanal(covmat=Harman74.cor$cov,factors=4) factor.congruence(list(mle,pa,pc,uls,wls)) #note that the order of factors and the sign of some of factors may differ #finally, compare the unrotated factor, ml, uls, and wls solutions wls <- fa(Harman74.cor$cov,4,rotate="none",fm="wls") pa <- fa(Harman74.cor$cov,4,rotate="none",fm="pa") minres <- factanal(factors=4,covmat=Harman74.cor$cov,rotation="none") mle <- fa(Harman74.cor$cov,4,rotate="none",fm="mle") uls <- fa(Harman74.cor$cov,4,rotate="none",fm="uls") factor.congruence(list(minres,mle,pa,wls,uls)) #in particular, note the similarity of the mle and min res solutions #note that the order of factors and the sign of some of factors may differ #an example of where the ML and PA and MR models differ is found in Thurstone.33. #compare the first two factors with the 3 factor solution Thurstone.33 <- as.matrix(Thurstone.33) mle2 <- fa(Thurstone.33,2,rotate="none",fm="mle") mle3 <- fa(Thurstone.33,3 ,rotate="none",fm="mle") pa2 <- fa(Thurstone.33,2,rotate="none",fm="pa") pa3 <- fa(Thurstone.33,3,rotate="none",fm="pa") mr2 <- fa(Thurstone.33,2,rotate="none") mr3 <- fa(Thurstone.33,3,rotate="none") factor.congruence(list(mle2,mr2,pa2,mle3,pa3,mr3)) #f5 <- fa(psychTools::bfi[1:25],5) #f5 #names are not in ascending numerical order (see note) #colnames(f5$loadings) <- paste("F",1:5,sep="") #f5 #Get the variance accounted for object from the print function p <- print(mr3) round(p$Vaccounted,2) #pool data and fa the pooled result (not run) #datasets.list <- list(bfi[1:500,1:25],bfi[501:1000,1:25], # bfi[1001:1500,1:25],bfi[1501:2000,1:25],bfi[2001:2500,1:25]) #five different data sets #temp <- fa.pooled(datasets.list,5) #do 5 factor analyses, pool the results } \keyword{ multivariate } \keyword{ models}psych/man/misc.Rd0000644000176200001440000002532414545700005013403 0ustar liggesusers\name{psych.misc} \alias{psych.misc} \alias{misc} \alias{tableF} \alias{lowerCor} \alias{lowerMat} \alias{matMult} \alias{progressBar} \alias{reflect} \alias{shannon} \alias{test.all} \alias{cor2} \alias{levels2numeric} \alias{char2numeric} \alias{nchar2numeric} \alias{isCorrelation} \alias{isCovariance} \alias{fromTo} \alias{cs} \alias{acs} \alias{SAPAfy} \title{Miscellaneous helper functions for the psych package} \description{This is a set of minor, if not trivial, helper functions. lowerCor finds the correlation of x variables and then prints them using lowerMat which is a trivial, but useful, function to round off and print the lower triangle of a matrix. reflect reflects the output of a factor analysis or principal components analysis so that one or more factors is reflected. (Requested by Alexander Weiss.) progressBar prints out ... as a calling routine (e.g., \code{\link{tetrachoric}}) works through a tedious calculation. shannon finds the Shannon index (H) of diversity or of information. test.all tests all the examples in a package. best.items sorts a factor matrix for absolute values and displays the expanded items names. fa.lookup returns sorted factor analysis output with item labels. \code{\link{cor2}} correlates two data.frames (of equal length). levels2numeric and char2numeric convert dataframe columns that are categorical/levels to numeric values. } \usage{ psych.misc() lowerCor(x,digits=2,use="pairwise",method="pearson",minlength=5,cor="cor",show=TRUE) cor2(x,y,digits=2,use="pairwise",method="pearson",cor="cor",show=TRUE) lowerMat(R, digits = 2,minlength=5) matMult(x,y) #multiply two matrices with missing data tableF(x,y) reflect(f,flip=NULL) progressBar(value,max,label=NULL) shannon(x,correct=FALSE,base=2) test.all(pl,package="psych",dependencies = c("Depends", "Imports", "LinkingTo"),find=FALSE,skip=NULL) levels2numeric(x) char2numeric(x,flag=TRUE) nchar2numeric(x,flag=TRUE) isCorrelation(x,na.rm=FALSE) #test if an object is a symmetric matrix # with diagonals of 1 and all values between -1 and 1 isCovariance(x) #test if an object is a symmetric matrix fromTo(data,from,to=NULL) #convert character names to locations as specified in colnames #of data cs(...) #convert a list of text words to character vector acs(...) #convert a list of text words to a single string SAPAfy(x,y) #sample y columns from x, replace other columns with NA } \arguments{ \item{R}{A rectangular matrix or data frame (probably a correlation matrix)} \item{x}{A data matrix or data frame or a vector depending upon the function.} \item{y}{A data matrix or data frame or a vector} \item{f}{The object returned from either a factor analysis (fa) or a principal components analysis (principal) } \item{digits}{round to digits} \item{minlength}{Abbreviate to minlength in lowerCor} \item{show}{Display the correlations from lowerCor or cor2 (default is TRUE)} \item{use}{Should pairwise deletion be done, or one of the other options to cor} \item{na.rm}{Should we check for NA on the diagonal of a correlation matices} \item{method}{"pearson", "kendall", "spearman"} \item{cor}{defaults to the normal cor function, but can also do tetrachoric, polychoric or covariances (cov)} \item{value}{the current value of some looping variable} \item{max}{The maximum value the loop will achieve} \item{label}{what function is looping} \item{flip}{The factor or components to be reversed keyed (by factor number)} \item{flag}{flag=TRUE in char2numeric will flag (with *) those variables that had been numeric. This changes the variable name. flag = FALSE does not mark those variables.} \item{correct}{Correct for the maximum possible information in this item} \item{base}{What is the base for the log function (default=2, e implies base = exp(1))} \item{pl}{The name of a package (or list of packages) to be activated and then have all the examples tested.} \item{package}{Find the dependencies for this package, e.g., psych} \item{dependencies}{Which type of dependency to examine?} \item{find}{Look up the dependencies, and then test all of their examples} \item{skip}{Do not test these dependencies} \item{data}{A dataframe or matrix to choose from} \item{from}{select from column with name from to column with name to} \item{to}{select from column from to column to} \item{...}{Any string of legitimate objects} } \value{ \code{\link{tableF}} is fast alternative to the table function for creating two way tables of numeric variables. It does not have any of the elegant checks of the table function and thus is much faster. Used in the \code{\link{tetrachoric}} and \code{\link{polychoric}} functions to maximize speed. \code{\link{lowerCor}} Finds and prints (using \code{\link{lowerMat}}) the lower diagonal correlation matrix but returns (invisibly) the full correlation matrix found with the use, method and cor parameters. The default values are for pairwise deletion of variables, "pearson" correlations. Specify cor="tetrachoric", "polychoric", or "mixed" for alternatives. By default, digits is set to print to 2 decimal places. By default, it will change character variables to numeric and flag them. \code{\link{lowerMat}}Shows the lower triangle of a matrix, rounded to digits with titles abbreviated to digits + 3 \code{\link{progressBar}} Display a series of dots as we progress through a slow loop (removed from anything using multicores). \code{\link{tableF}} (for tableFast) is a cut down version of table that does no error checking, nor returns pretty output, but is significantly faster than table. It will just work on two integer vectors. This is used in polychoric an tetrachoric for about a 50\% speed improvement for large problems. \code{\link{shannon}} finds Shannon's H index of information. Used for estimating the complexity or diversity of the distribution of responses in a vector or matrix. \deqn{H = -\sum{p_i log(p_i) }} \code{\link{test.all}} allows one to test all the examples in specified package. This allows us to make sure that those examples work when other packages (e.g., psych) are also loaded. This is used when developing revisions to the psych package to make sure the the other packages work. Some packages will not work and/or crash the system (e.g., DeducerPlugInScaling requires Java and even with Java, crashes when loaded, even if psych is not there!). Alternatively, if testing a long list of dependencies, you can skip the first part by specifying them by name. \code{\link{cor2}} will find and display the correlations between two sets of variables, rounded to digits, using the other options. If x is a list of multiple sets (two or more), then all sets are correlated. \code{\link{levels2numeric}}converts character data with levels to numeric data. Used in the SAPA analyses where we code some variables, (e.g., gender, education) with character codes to help in the documentation of files, but want to do analyses of correlations with other categorical variables. \code{\link{char2numeric}}converts character data with levels to numeric data. Used for cases when data from questionnaires include the response categories rathere than numeric data. Unless the levels of the data are in meaningful order, the numeric results are not useful. Most useful if doing polychoric analyses. Note this is not suitable for recoding numeric data stored as characters, for it will force them to levels first. See \code{\link{nchar2numeric}}. Note that for very small data sets, because the recoding done by char2numeric is column wise, it might result in different numerical results for the same characters in different columns. Problems with \code{\link{char2numeric}} can be solved by using the \code{psychTools::recode} function. \code{\link{nchar2numeric}}converts numbers coded as characters (quoted) to numeric without forcing them to factors first. \code{\link{fromTo}} selects the columns in data from to (see the examples) \code{\link{cs}} concatenates strings without the need to identify variables by " ". } \details{ \code{\link{lowerCor}} prints out the lower off diagonal matrix rounded to digits with column names abbreviated to digits + 3 characters, but also returns the full and unrounded matrix. By default, it uses pairwise deletion of variables. It in turn calls \code{\link{lowerMat}} which does the pretty printing. It is important to remember to not call \code{\link{lowerCor}} when all you need is \code{\link{lowerMat}}! \code{\link{cs}} is a direct copy of the Cs function in the Hmisc package by Frank Harrell. Added to psych to avoid the overhead of the Hmisc package. } \seealso{\code{\link{corr.test}} to find correlations, count the pairwise occurrences, and to give significance tests for each correlation. \code{\link{r.test}} for a number of tests of correlations, including tests of the difference between correlations. \code{\link{lowerUpper}} will display the differences between two matrices.} \examples{ lowerMat(Thurstone) lb <- lowerCor(psychTools::bfi[1:10]) #finds and prints the lower correlation matrix, # returns the square matrix. lowerCor(psychTools::ability[,1:5]) #the Pearson correlations, compare with lowerCor(psychTools::ability[,1:5],cor="tetra") #fiml <- corFiml(psychTools::bfi[1:10]) #FIML correlations require lavaan package #lowerMat(fiml) #to get pretty output f3 <- fa(Thurstone,3) f3r <- reflect(f3,2) #reflect the second factor #find the complexity of the response patterns of the iqitems. round(shannon(psychTools::iqitems),2) #test.all('BinNor') #Does the BinNor package work when we are using other packages bestItems(lb,"A3",cut=.1,dictionary=psychTools::bfi.dictionary[1:2]) #to make this a latex table #df2latex(bestItems(lb,2,cut=.2)) # data(psychTools::bfi.dictionary) f2 <- fa(psychTools::bfi[1:10],2) fa.lookup(f2,psychTools::bfi.dictionary) sa1 <-sat.act[1:2] sa2 <- sat.act[3:4] sa3 <- sat.act[5:6] cor2(sa1,sa2) cor2(list(sa1,sa2)) #show within set and between set cors cor2(list(sa1,sa2,sa3)) lowerCor(fromTo(sat.act,"ACT","SATQ")) #show some correlations vect <- cs(ACT,SATQ) #skip the quotes vect #they are in this vector #to combine longer terms vect <- cs("Here is a longish",vector, that, we ,"want to combine", into, several) vect temp <- acs("Here is a longish",vector, that, we ,"want to combine", into, one) temp lowerCor(fromTo(sat.act,cs(ACT,SATQ))) lowerCor(fromTo(bfi,cs(A3,C4)),cor="poly") #compare with lowerCor(fromTo(bfi,cs(A3,C4))) set.seed(42) temp <- SAPAfy(bfi[1:10],3) #30 % sample from bfi f2 <- fa(bfi[1:10],2) f2s <- fa(temp,2) fa.congruence(f2s,f2) #the two factor structure are almost identical #although the scores are not identical cor2(f2$scores, f2s$scores) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} psych/man/mat.sort.Rd0000644000176200001440000000253013377010614014213 0ustar liggesusers\name{mat.sort} \Rdversion{1.1} \alias{mat.sort} \alias{matSort} \title{Sort the elements of a correlation matrix to reflect factor loadings} \description{To see the structure of a correlation matrix, it is helpful to organize the items so that the similar items are grouped together. One such grouping technique is factor analysis. mat.sort will sort the items by a factor model (if specified), or any other order, or by the loadings on the first factor (if unspecified) } \usage{ mat.sort(m, f = NULL) matSort(m, f = NULL) } \arguments{ \item{m}{A correlation matrix } \item{f}{A factor analysis output (i.e., one with a loadings matrix) or a matrix of weights } } \details{The factor analysis output is sorted by size of the largest factor loading for each variable and then the matrix items are organized by those loadings. The default is to sort by the loadings on the first factor. Alternatives allow for ordering based upon any vector or matrix. } \value{ A sorted correlation matrix, suitable for showing with \code{\link{corPlot}}. } \author{William Revelle} \seealso{ \code{\link{fa}}, \code{\link{corPlot}} } \examples{data(Bechtoldt.1) sorted <- mat.sort(Bechtoldt.1,fa(Bechtoldt.1,5)) corPlot(sorted,xlas=2) #vertical xaxis names } \keyword{ multivariate}% at least one, from doc/KEYWORDS \keyword{models }% __ONLY ONE__ keyword per line psych/man/test.irt.Rd0000644000176200001440000000432613032566146014231 0ustar liggesusers\name{test.irt} \alias{test.irt} %- Also NEED an '\alias' for EACH other topic documented here. \title{A simple demonstration (and test) of various IRT scoring algorthims. } \description{ Item Response Theory provides a number of alternative ways of estimating latent scores. Here we compare 6 different ways to estimate the latent variable associated with a pattern of responses. Originally developed as a test for scoreIrt, but perhaps useful for demonstration purposes. Items are simulated using \code{\link{sim.irt}} and then scored using factor scores from \code{\link{factor.scores}} using statistics found using \code{\link{irt.fa}}, simple weighted models for 1 and 2 PL and 2 PN. Results show almost perfect agreement with estimates from MIRT and ltm for the dichotomous case and with MIRT for the polytomous case. (Results from ltm are unstable for the polytomous case, sometimes agreeing with \code{\link{scoreIrt}} and MIRT, sometimes being much worse.) } \usage{ test.irt(nvar = 9, n.obs = 1000, mod = "logistic",type="tetra", low = -3, high = 3, seed = NULL) } \arguments{ \item{nvar}{Number of variables to create (simulate) and score} \item{n.obs}{Number of simulated subjects} \item{mod}{"logistic" or "normal" theory data are generated} \item{type}{"tetra" for dichotomous, "poly" for polytomous} \item{low}{items range from low to high} \item{high}{items range from low to high} \item{seed}{Set the random number seed using some non-nul value. Otherwise, use the existing sequence of random numbers} } \details{n.obs observations (0/1) on nvar variables are simulated using either a logistic or normal theory model. Then, a number of different scoring algorithms are applied and shown graphically. Requires the ltm package to be installed to compare ltm scores. } \value{ A dataframe of scores as well as the generating theta true score. A graphic display of the correlations is also shown.} \author{William Revelle} \seealso{ \code{\link{scoreIrt}},\code{\link{irt.fa}} } \examples{ #not run #test.irt(9,1000) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate }% use one of RShowDoc("KEYWORDS") \keyword{models }% __ONLY ONE__ keyword per line psych/man/phi2poly.Rd0000644000176200001440000000352312436644351014223 0ustar liggesusers\name{phi2tetra} \alias{phi2tetra} \alias{phi2poly} \title{ Convert a phi coefficient to a tetrachoric correlation } \description{Given a phi coefficient (a Pearson r calculated on two dichotomous variables), and the marginal frequencies (in percentages), what is the corresponding estimate of the tetrachoric correlation? Given a two x two table of counts \cr \tabular{lll}{ \tab a \tab b \cr \tab c \tab d \cr } The phi coefficient is (a - (a+b)*(a+c))/sqrt((a+b)(a+c)(b+d)(c+c)). This function reproduces the cell entries for specified marginals and then calls the tetrachoric function. (Which was originally based upon John Fox's polychor function.) The phi2poly name will become deprecated in the future. } \usage{ phi2tetra(ph,m,n=NULL,correct=TRUE) phi2poly(ph,cp,cc,n=NULL,correct=TRUE) #deprecated } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ph}{phi } \item{m}{a vector of the selection ratio and probability of criterion. In the case where ph is a matrix, m is a vector of the frequencies of the selected cases} \item{correct}{When finding tetrachoric correlations, should we correct for continuity for small marginals. See \code{\link{tetrachoric}} for a discussion.} \item{n}{If the marginals are given as frequencies, what was the total number of cases?} \item{cp}{ probability of the predictor -- the so called selection ratio } \item{cc}{probability of the criterion -- the so called success rate. } } \details{used to require the mvtnorm package but this has been replaced with mnormt } \value{a tetrachoric correlation } \author{ William Revelle} \seealso{ \code{\link{tetrachoric}}, \code{\link{Yule2phi.matrix}}, \code{\link{phi2poly.matrix}} } \examples{ phi2tetra(.3,c(.5,.5)) #phi2poly(.3,.3,.7) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/cor.smooth.Rd0000644000176200001440000001003413464055022014534 0ustar liggesusers\name{cor.smooth} \alias{cor.smooth} \alias{cor.smoother} \title{Smooth a non-positive definite correlation matrix to make it positive definite} \description{Factor analysis requires positive definite correlation matrices. Unfortunately, with pairwise deletion of missing data or if using \code{\link{tetrachoric}} or \code{\link{polychoric}} correlations, not all correlation matrices are positive definite. cor.smooth does a eigenvector (principal components) smoothing. Negative eigen values are replaced with 100 * eig.tol, the matrix is reproduced and forced to a correlation matrix using cov2cor. } \usage{ cor.smooth(x,eig.tol=10^-12) cor.smoother(x,cut=.01) } \arguments{ \item{x}{A correlation matrix or a raw data matrix.} \item{eig.tol}{the minimum acceptable eigenvalue}. \item{cut}{Report all abs(residuals) > cut} } \details{The smoothing is done by eigen value decomposition. eigen values < eig.tol are changed to 100 * eig.tol. The positive eigen values are rescaled to sum to the number of items. The matrix is recomputed (eigen.vectors \%*\% diag(eigen.values) \%*\% t(eigen.vectors) and forced to a correlation matrix using cov2cor. (See Bock, Gibbons and Muraki, 1988 and Wothke, 1993). This does not implement the Knol and ten Berge (1989) solution, nor do nearcor and posdefify in sfmsmisc, not does nearPD in Matrix. As Martin Maechler puts it in the posdedify function, "there are more sophisticated algorithms to solve this and related problems." cor.smoother examines all of nvar minors of rank nvar-1 by systematically dropping one variable at a time and finding the eigen value decomposition. It reports those variables, which, when dropped, produce a positive definite matrix. It also reports the number of negative eigenvalues when each variable is dropped. Finally, it compares the original correlation matrix to the smoothed correlation matrix and reports those items with absolute deviations great than cut. These are all hints as to what might be wrong with a correlation matrix. } \value{The smoothed matrix with a warning reporting that smoothing was necessary (if smoothing was in fact necessary). } \references{ R. Darrell Bock, Robert Gibbons and Eiji Muraki (1988) Full-Information Item Factor Analysis. Applied Psychological Measurement, 12 (3), 261-280. Werner Wothke (1993), Nonpositive definite matrices in structural modeling. In Kenneth A. Bollen and J. Scott Long (Editors),Testing structural equation models, Sage Publications, Newbury Park. D.L. Knol and JMF ten Berge (1989) Least squares approximation of an improper correlation matrix by a proper one. Psychometrika, 54, 53-61. } \author{William Revelle} \seealso{ \code{\link{tetrachoric}}, \code{\link{polychoric}}, \code{\link{fa}} and \code{\link{irt.fa}}, and the \code{\link[psychTools]{burt}} data set. See also nearcor and posdefify in the sfsmisc package and nearPD in the Matrix package. } \examples{ burt <- psychTools::burt bs <- cor.smooth(psychTools::burt) #burt data set is not positive definite plot(burt[lower.tri(burt)],bs[lower.tri(bs)],ylab="smoothed values",xlab="original values") abline(0,1,lty="dashed") round(burt - bs,3) fa(burt,2) #this throws a warning that the matrix yields an improper solution #Smoothing first throws a warning that the matrix was improper, #but produces a better solution fa(cor.smooth(burt),2) #this next example is a correlation matrix from DeLeuw used as an example #in Knol and ten Berge. #the example is also used in the nearcor documentation cat("pr is the example matrix used in Knol DL, ten Berge (1989)\n") pr <- matrix(c(1, 0.477, 0.644, 0.478, 0.651, 0.826, 0.477, 1, 0.516, 0.233, 0.682, 0.75, 0.644, 0.516, 1, 0.599, 0.581, 0.742, 0.478, 0.233, 0.599, 1, 0.741, 0.8, 0.651, 0.682, 0.581, 0.741, 1, 0.798, 0.826, 0.75, 0.742, 0.8, 0.798, 1), nrow = 6, ncol = 6) sm <- cor.smooth(pr) resid <- pr - sm # several goodness of fit tests # from Knol and ten Berge tr(resid \%*\% t(resid)) /2 # from nearPD sum(resid^2)/2 } \keyword{ multivariate } \keyword{ models} psych/man/make.keys.Rd0000755000176200001440000001345414052506705014347 0ustar liggesusers\name{make.keys} \alias{make.keys} \alias{keys2list} \alias{selectFromKeys} \alias{makePositiveKeys} \title{ Create a keys matrix for use by score.items or cluster.cor} \description{ When scoring items by forming composite scales either from the raw data using \code{\link{scoreItems}} or from the correlation matrix using \code{\link{cluster.cor}}, it used to be necessary to create a keys matrix. This is no longer necessary as most of the scoring functions will directly use a keys list. \code{\link{make.keys}} is just a short cut for creating a keys matrix. The keys matrix is a nvar x nscales matrix of -1,0, 1 and defines the membership for each scale. Items can be specified by location or by name. } \usage{ make.keys(nvars, keys.list, item.labels = NULL, key.labels = NULL) keys2list(keys,sign=TRUE) selectFromKeys(keys.list) makePositiveKeys(keys.list,sign=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{nvars}{Number of variables items to be scored, or the name of the data.frame/matrix to be scored} \item{keys.list}{ A list of the scoring keys, one element for each scale} \item{item.labels}{ Typically, just the colnames of the items data matrix. } \item{key.labels}{ Labels for the scales can be specified here, or in the key.list } \item{keys}{A keys matrix returned from make.keys} \item{sign}{if TRUE, prefix negatively keyed items with - (e.g., ``-E2")} } \details{The easiest way to prepare keys for \code{\link{scoreItems}}, \code{\link{scoreOverlap}}, \code{\link{scoreIrt.1pl}}, or \code{\link{scoreIrt.2pl}} is to specify a keys.list. This is just a list specifying the name of the scales to be scores and the direction of the items to be used. In earlier versions (prior to 1.6.9) keys were formed as a matrix of -1, 0, and 1s for all the items using make.keys. This is no longer necessary, but make.keys is kept for compatibility with earlier versions. There are three ways to create keys for the \code{\link{scoreItems}}, \code{\link{scoreOverlap}}, \code{\link{scoreIrt.1pl}}, or \code{\link{scoreIrt.2pl}} functions. One is to laboriously do it in a spreadsheet and then copy them into R. The other is to just specify them by item number in a list. \code{\link{make.keys}} allows one to specify items by name or by location or a mixture of both. \code{\link{keys2list}} reverses the \code{\link{make.keys}} process and returns a list of scoring keys with the item names for each item to be keyed. If sign=FALSE, this is just a list of the items to be scored. (Useful for \code{\link{scoreIrt.2pl}} \code{\link{selectFromKeys}} will strip the signs from a keys.list and create a vector of item names (deleting duplicates) associated with those keys. This is useful if using a keys.list to define scales and then just selecting those items that are in subset of the keys.list. This is now done in the scoring functions in the interest of speed. Since these scoring functions \code{\link{scoreItems}}, \code{\link{scoreOverlap}}, \code{\link{scoreIrt.1pl}}, or \code{\link{scoreIrt.2pl}} can now (> version 1.6.9) just take a keys.list as input, make.keys is not as important, but is kept for documentation purposes. To address items by name it is necessary to specify item names, either by using the item.labels value, or by putting the name of the data file or the colnames of the data file to be scored into the first (nvars) position. If specifying by number (location), then nvars is the total number of items in the object to be scored, not just the number of items used. See the examples for the various options. Note that make.keys was revised in Sept, 2013 to allow for keying by name. It is also possible to do several make.keys operations and then combine them using \code{\link{superMatrix}}. The alternative, if using the keys.list features is just to concatenate them. makePositiveKeys is useful for taking subsets of keys (e.g. from \code{\link{bestScales}} )and create separate keys for the positively and negatively keyed items. } \value{ \item{keys }{a nvars x nkeys matrix of -1, 0, or 1s describing how to score each scale. nkeys is the length of the keys.list} } \seealso{ \code{\link{scoreItems}}, \code{\link{scoreOverlap}}, \code{\link{cluster.cor}} \code{\link{superMatrix}} } \examples{ data(attitude) #specify the items by location key.list <- list(all=c(1,2,3,4,-5,6,7), first=c(1,2,3), last=c(4,5,6,7)) keys <- make.keys(7,key.list,item.labels = colnames(attitude)) keys #now, undo this new.keys.list <- keys2list(keys) #note, these are now given the variable names select <- selectFromKeys(key.list) #scores <- score.items(keys,attitude) #scores # data(psychTools::bfi) #first create the keys by location (the conventional way) keys.list <- list(agree=c(-1,2:5),conscientious=c(6:8,-9,-10), extraversion=c(-11,-12,13:15),neuroticism=c(16:20),openness = c(21,-22,23,24,-25)) keys <- make.keys(25,keys.list,item.labels=colnames(psychTools::bfi)[1:25]) new.keys.list <- keys2list(keys) #these will be in the form of variable names #alternatively, create by a mixture of names and locations keys.list <- list(agree=c("-A1","A2","A3","A4","A5"), conscientious=c("C1","C2","C2","-C4","-C5"),extraversion=c("-E1","-E2","E3","E4","E5"), neuroticism=c(16:20),openness = c(21,-22,23,24,-25)) keys <- make.keys(psychTools::bfi, keys.list) #specify the data file to be scored (bfi) #or keys <- make.keys(colnames(psychTools::bfi),keys.list) #specify the names of the variables #to be used #or #specify the number of variables to be scored and their names in all cases keys <- make.keys(28,keys.list,colnames(psychTools::bfi)) scores <- scoreItems(keys,psychTools::bfi) summary(scores) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{models}psych/man/winsor.Rd0000644000176200001440000000366512334454040013774 0ustar liggesusers\name{winsor} \alias{winsor} \alias{winsor.mean} \alias{winsor.means} \alias{winsor.sd} \alias{winsor.var} \title{Find the Winsorized scores, means, sds or variances for a vector, matrix, or data.frame } \description{Among the robust estimates of central tendency are trimmed means and Winsorized means. This function finds the Winsorized scores. The top and bottom trim values are given values of the trimmed and 1- trimmed quantiles. Then means, sds, and variances are found. } \usage{ winsor(x, trim = 0.2, na.rm = TRUE) winsor.mean(x, trim = 0.2, na.rm = TRUE) winsor.means(x, trim = 0.2, na.rm = TRUE) winsor.sd(x, trim = 0.2, na.rm = TRUE) winsor.var(x, trim = 0.2, na.rm = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A data vector, matrix or data frame} \item{trim}{Percentage of data to move from the top and bottom of the distributions} \item{na.rm}{Missing data are removed } } \details{Among the many robust estimates of central tendency, some recommend the Winsorized mean. Rather than just dropping the top and bottom trim percent, these extreme values are replaced with values at the trim and 1- trim quantiles. } \value{A scalar or vector of winsorized scores or winsorized means, sds, or variances (depending upon the call). } \references{Wilcox, Rand R. (2005) Introduction to robust estimation and hypothesis testing. Elsevier/Academic Press. Amsterdam ; Boston. } \author{William Revelle with modifications suggested by Joe Paxton and a further correction added (January, 2009) to preserve the original order for the winsor case.} \seealso{ \code{\link{interp.median}} } \examples{ data(sat.act) winsor.means(sat.act) #compare with the means of the winsorized scores y <- winsor(sat.act) describe(y) xy <- data.frame(sat.act,y) #pairs.panels(xy) #to see the effect of winsorizing x <- matrix(1:100,ncol=5) winsor(x) winsor.means(x) y <- 1:11 winsor(y,trim=.5) } \keyword{univar} psych/man/bfi.Rd0000644000176200001440000001376313715125450013217 0ustar liggesusers\name{bfi} \alias{bfi} \alias{bfi.keys} \docType{data} \title{25 Personality items representing 5 factors} \description{25 personality self report items taken from the International Personality Item Pool (ipip.ori.org) were included as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. The data from 2800 subjects are included here as a demonstration set for scale construction, factor analysis, and Item Response Theory analysis. Three additional demographic variables (sex, education, and age) are also included. This data set is deprecated and users are encouraged to use \code{\link[psychTools]{bfi}}. } \usage{data(bfi) } \format{ A data frame with 2800 observations on the following 28 variables. (The q numbers are the SAPA item numbers). \describe{ \item{\code{A1}}{Am indifferent to the feelings of others. (q_146)} \item{\code{A2}}{Inquire about others' well-being. (q_1162)} \item{\code{A3}}{Know how to comfort others. (q_1206) } \item{\code{A4}}{Love children. (q_1364)} \item{\code{A5}}{Make people feel at ease. (q_1419)} \item{\code{C1}}{Am exacting in my work. (q_124)} \item{\code{C2}}{Continue until everything is perfect. (q_530)} \item{\code{C3}}{Do things according to a plan. (q_619)} \item{\code{C4}}{Do things in a half-way manner. (q_626)} \item{\code{C5}}{Waste my time. (q_1949)} \item{\code{E1}}{Don't talk a lot. (q_712)} \item{\code{E2}}{Find it difficult to approach others. (q_901)} \item{\code{E3}}{Know how to captivate people. (q_1205)} \item{\code{E4}}{Make friends easily. (q_1410)} \item{\code{E5}}{Take charge. (q_1768)} \item{\code{N1}}{Get angry easily. (q_952)} \item{\code{N2}}{Get irritated easily. (q_974)} \item{\code{N3}}{Have frequent mood swings. (q_1099} \item{\code{N4}}{Often feel blue. (q_1479)} \item{\code{N5}}{Panic easily. (q_1505)} \item{\code{O1}}{Am full of ideas. (q_128)} \item{\code{O2}}{Avoid difficult reading material.(q_316)} \item{\code{O3}}{Carry the conversation to a higher level. (q_492)} \item{\code{O4}}{Spend time reflecting on things. (q_1738)} \item{\code{O5}}{Will not probe deeply into a subject. (q_1964)} \item{\code{gender}}{Males = 1, Females =2} \item{\code{education}}{1 = HS, 2 = finished HS, 3 = some college, 4 = college graduate 5 = graduate degree} \item{\code{age}}{age in years} } } \details{ This data set is deprecated and users are encouraged to use \code{\link[psychTools]{bfi}}.It is kept here backward compatability for one more release. The first 25 items are organized by five putative factors: Agreeableness, Conscientiousness, Extraversion, Neuroticism, and Opennness. The scoring key is created using \code{\link{make.keys}}, the scores are found using \code{\link{score.items}}. These five factors are a useful example of using \code{\link{irt.fa}} to do Item Response Theory based latent factor analysis of the \code{\link{polychoric}} correlation matrix. The endorsement plots for each item, as well as the item information functions reveal that the items differ in their quality. The item data were collected using a 6 point response scale: 1 Very Inaccurate 2 Moderately Inaccurate 3 Slightly Inaccurate 4 Slightly Accurate 5 Moderately Accurate 6 Very Accurate as part of the Synthetic Apeture Personality Assessment (SAPA \url{https://www.sapa-project.org/}) project. To see an example of the data collection technique, visit \url{https://www.sapa-project.org/} or the International Cognitive Ability Resource at \url{https://icar-project.org/}. The items given were sampled from the International Personality Item Pool of Lewis Goldberg using the sampling technique of SAPA. This is a sample data set taken from the much larger SAPA data bank. } \source{The items are from the ipip (Goldberg, 1999). The data are from the SAPA project (Revelle, Wilt and Rosenthal, 2010) , collected Spring, 2010 ( \url{https://www.sapa-project.org/}). } \references{Goldberg, L.R. (1999) A broad-bandwidth, public domain, personality inventory measuring the lower-level facets of several five-factor models. In Mervielde, I. and Deary, I. and De Fruyt, F. and Ostendorf, F. (eds) Personality psychology in Europe. 7. Tilburg University Press. Tilburg, The Netherlands. Revelle, W., Wilt, J., and Rosenthal, A. (2010) Individual Differences in Cognition: New Methods for examining the Personality-Cognition Link In Gruszka, A. and Matthews, G. and Szymura, B. (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. Revelle, W, Condon, D.M., Wilt, J., French, J.A., Brown, A., and Elleman, L.G. (2016) Web and phone based data collection using planned missing designs. In Fielding, N.G., Lee, R.M. and Blank, G. (Eds). SAGE Handbook of Online Research Methods (2nd Ed), Sage Publcations. } \seealso{\code{\link{bi.bars}} to show the data by age and gender, \code{\link{irt.fa}} for item factor analysis applying the irt model.} \note{ This data set is deprecated and users are encouraged to use \code{\link[psychTools]{bfi}}.It is kept here backward compatability for one more release. The bfi data set and items should not be confused with the BFI (Big Five Inventory) of Oliver John and colleagues (John, O. P., Donahue, E. M., & Kentle, R. L. (1991). The Big Five Inventory--Versions 4a and 54. Berkeley, CA: University of California,Berkeley, Institute of Personality and Social Research.) } \examples{ data(bfi) psych::describe(bfi) # create the bfi.keys (actually already saved in the data file) keys <- list(agree=c("-A1","A2","A3","A4","A5"),conscientious=c("C1","C2","C3","-C4","-C5"), extraversion=c("-E1","-E2","E3","E4","E5"),neuroticism=c("N1","N2","N3","N4","N5"), openness = c("O1","-O2","O3","O4","-O5")) scores <- psych::scoreItems(keys,bfi,min=1,max=6) #specify the minimum and maximum values scores #show the use of the fa.lookup with a dictionary #psych::keys.lookup(bfi.keys,bfi.dictionary[,1:4]) #deprecated -- use psychTools } \keyword{datasets} psych/man/multilevel.reliability.Rd0000644000176200001440000003346514524267320017154 0ustar liggesusers\name{multilevel.reliability} \alias{mlr} \alias{multilevel.reliability} \alias{mlArrange} \alias{mlPlot} \title{Find and plot various reliability/gneralizability coefficients for multilevel data } \description{ Various indicators of reliability of multilevel data (e.g., items over time nested within subjects) may be found using generalizability theory. A basic three way anova is applied to the data from which variance components are extracted. Random effects for a nested design are found by lme. These are, in turn, converted to several reliability/generalizability coefficients. An optional call to lme4 to use lmer may be used for unbalanced designs with missing data. mlArrange is a helper function to convert wide to long format. Data can be rearranged from wide to long format, and multiple lattice plots of observations overtime for multiple variables and multiple subjects are created. } \usage{ mlr(x, grp = "id", Time = "time", items = c(3:5),alpha=TRUE,icc=FALSE, aov=TRUE, lmer=FALSE,lme = TRUE,long=FALSE,values=NA,na.action="na.omit",plot=FALSE, main="Lattice Plot by subjects over time") mlArrange(x, grp = "id", Time = "time", items = c(3:5),extra=NULL) mlPlot(x, grp = "id", Time = "time", items = c(3:5),extra=NULL, col=c("blue","red","black","grey"),type="b", main="Lattice Plot by subjects over time",...) multilevel.reliability(x, grp = "id", Time = "time", items = c(3:5),alpha=TRUE,icc=FALSE, aov=TRUE,lmer=FALSE,lme = TRUE,long=FALSE,values=NA,na.action="na.omit", plot=FALSE,main="Lattice Plot by subjects over time") #alias for mlr } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{A data frame with persons, time, and items.} \item{grp}{Which variable specifies people (groups)} \item{Time}{Which variable specifies the temporal sequence?} \item{items}{Which items should be scored? Note that if there are multiple scales, just specify the items on one scale at a time. An item to be reversed scored can be specified by a minus sign. If long format, this is the column specifying item number. } \item{alpha}{If TRUE, report alphas for every subject (default)} \item{icc}{If TRUE, find ICCs for each person -- can take a while} \item{aov}{if FALSE, and if icc is FALSE, then just draw the within subject plots} \item{lmer}{Should we use the lme4 package and lmer or just do the ANOVA? Requires the lme4 package to be installed. Necessary to do crossed designs with missing data but takes a very long time.} \item{lme}{If TRUE, will find the nested components of variance. Relatively fast.} \item{long}{Are the data in wide (default) or long format.} \item{values}{If the data are in long format, which column name (number) has the values to be analyzed?} \item{na.action}{How to handle missing data. Passed to the lme function. } \item{plot}{If TRUE, show a lattice plot of the data by subject} \item{extra}{Names or locations of extra columns to include in the long output. These will be carried over from the wide form and duplicated for all items. See example.} \item{col}{Color for the lines in mlPlot. Note that items are categorical and thus drawn in alphabetical order. Order the colors appropriately.} \item{type}{The standard type for lines (p,l, b)} \item{main}{The main title for the plot (if drawn)} \item{...}{Other parameters to pass to xyplot} } \details{ Classical reliabiiity theory estimates the amount of variance in a set of observations due to a true score that varies over subjects. Generalizability theory extends this model to include other sources of variance, specifically, time. The classic studies using this approach are people measured over multiple time points with multiple items. Then the question is, how stable are various individual differences. Intraclass correlations (ICC) are found for each subject over items, and for each subject over time. Alpha reliabilities are found for each subject for the items across time. More importantly, components of variance for people, items, time, and their interactions are found either by classical analysis of variance (aov) or by multilevel mixed effect modeling (lme). These are then used to form several different estimates of generalizability. Very thoughtful discussions of these procedure may be found in chapters by Shrout and Lane. The variance components are the Between Person Variance \eqn{\sigma^2_P}, the variance between items \eqn{\sigma^2_I}, over time \eqn{\sigma^2_T}, and their interactions. Then, \eqn{RKF} is the reliability of average of all ratings across all items and times (Fixed time effects). (Shrout and Lane, Equation 6): \deqn{R_{kF} = \frac{\sigma^2_P + \sigma^2_{PI}/n.I}{\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_e/(n.I n.P}}{Rkf = (\sigma^2_P + \sigma^2_{PI}/n.I)/(\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_e/(n.I n.P))} The generalizability of a single time point across all items (Random time effects) is just \deqn{R_{1R} = \frac{\sigma^2_P + \sigma^2_{PI}/n.I}{\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_T + \sigma^2_{PT}+ \sigma^2_e/(n.I)}}{R1R = (\sigma^2_P + \sigma^2_{PI}/n.I)/(\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_T + \sigma^2_{PT} \sigma^2_e/n.I)} (Shrout and Lane equation 7 with a correction per Sean Lane.) Generalizability of average time points across all items (Random effects). (Shrout and Lane, equation 8) \deqn{R_{kR} = \frac{\sigma^2_P + \sigma^2_{PI}/n.I}{\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_T/n.T + \sigma^2_{PT}/n.T+ \sigma^2_e/n.I}}{RkR = (\sigma^2_P + \sigma^2_{PI}/n.I)/(\sigma^2_P + \sigma^2_{PI}/n.I + \sigma^2_T/n.T + \sigma^2_{PT}/n.T + \sigma^2_e/(n.I n.T))} Generalizability of change scores (Shrout and Lane, equation 9) \deqn{R_{C} = \frac{\sigma^2_{PT}}{\sigma^2_{PT} + \sigma^2_e/n.I}}{RC = (\sigma^2_PT)/(\sigma^2_PT + \sigma^2_e/(n.I))}. If the design may be thought of as fully crossed, then either aov or lmer can be used to estimate the components of variance. With no missing data and a balanced design, these will give identical answers. However aov breaks down with missing data and seems to be very slow and very memory intensive for large problems ( 5,919 seconds for 209 cases with with 88 time points and three items on a Mac Powerbook with a 2.8 GHZ Intel Core I7). The slowdown probably is memory related, as the memory demands increased to 22.62 GB of compressed memory. lmer will handle this design but is not nearly as slow (242 seconds for the 209 cases with 88 time points and three items) as the aov approach. If the design is thought of as nested, rather than crossed, the components of variance are found using the \code{\link{lme}} function from nlme. This is very fast (114 cases with 88 time points and three items took 3.5 seconds). The nested design leads to the generalizability of K random effects Nested (Shrout and Lane, equation 10): \deqn{R_{KRN} = \frac{\sigma^2_P }{\sigma^2_P + \sigma^2_{T(P)}/n.I + \sigma^2_e/(n.I n.P}}{RKkRN = (\sigma^2_P)/(\sigma^2_P + \sigma^2_{T(P)}/n.p + \sigma^2_e/(n.I n.T))} And, finally, to the reliability of between person differences, averaged over items. (Shrout and Lane, equation 11). \deqn{R_{CN} = \frac{\sigma^2_{T(P)} }{\sigma^2_{T(P)} + \sigma^2_e/(n.I}}{RCN = (\sigma^2_T(P)/(\sigma^2_T(P) + \sigma^2_e/(n.I))} Unfortunately, when doing the nested analysis, \code{\link{lme}} will sometimes issue an obnoxious error about failing to converge. To fix this, turning off \code{\link{lme}} and just using lmer seems to solve the problem (i.e., set lme=FALSE and lmer=TRUE). (\code{\link{lme}} is part of core R and its namespace is automatically attached when loading \code{\link{psych}}). For many problems, lmer is not necessary and is thus not loaded. However sometimes it is useful. To use lmer it is necessary to have the lme4 package installed. It will be automatically loaded if it is installed and requested. In the interests of making a 'thin' package, lmer is suggested,not required. The input can either be in 'wide' or 'long' form. If in wide form, then specify the grouping variable, the 'time' variable, and the the column numbers or names of the items. (See the first example). If in long format, then what is the column (name or number) of the dependent variable. (See the second example.) \code{\link{mlArrange}} takes a wide data.frame and organizes it into a `long' data.frame suitable for a lattice \code{\link{xyplot}}. This is a convenient alternative to \code{\link{stack}}, particularly for unbalanced designs. The wide data frame is reorganized into a long data frame organized by grp (typically a subject id), by Time (typically a time varying variable, but can be anything, and then stacks the items within each person and time. Extra variables are carried over and matched to the appropriate grp and Time. Thus, if we have N subjects over t time points for k items, in wide format for N * t rows where each row has k items and e extra pieces of information, we get a N x t * k row by 4 + e column dataframe. The first four columns in the long output are id, time, values, and item names, the remaining columns are the extra values. These could be something such as a trait measure for each subject, or the situation in which the items are given. \code{\link{mlArrange}} plots k items over the t time dimensions for each subject. } \value{ \item{n.obs}{Number of individuals} \item{n.time}{Maximum number of time intervals} \item{n.items}{Number of items} \item{components}{Components of variance associated with individuals, Time, Items, and their interactions.} \item{RkF }{Reliability of average of all ratings across all items and times (fixed effects).} \item{R1R}{Generalizability of a single time point across all items (Random effects)} \item{RkR}{Generalizability of average time points across all items (Random effects)} \item{Rc}{Generalizability of change scores over time.} \item{RkRn}{ Generalizability of between person differences averaged over time and items} \item{Rcn}{Generalizability of within person variations averaged over items (nested structure)} \item{ANOVA}{The summary anova table from which the components are found (if done),} \item{s.lmer}{The summary of the lmer analysis (if done),} \item{s.lme}{The summary of the lme analysis (if done),} \item{alpha}{Within subject alpha over items and time.} \item{summary.by.person}{Summary table of ICCs organized by person,} \item{summary.by.time}{Summary table of ICCs organized by time.} \item{ICC.by.person}{A rather long list of ICCs by person.} \item{ICC.by.time}{Another long list of ICCs, this time for each time period,} \item{long}{The data (x) have been rearranged into long form for graphics or for further analyses using lme, lmer, or aov that require long form.} } \references{ Bolger, Niall and Laurenceau, Jean-Phillippe, (2013) Intensive longitudinal models. New York. Guilford Press. Cranford, J. A., Shrout, P. E., Iida, M., Rafaeli, E., Yip, T., & Bolger, N. (2006). A procedure for evaluating sensitivity to within-person change: Can mood measures in diary studies detect change reliably? Personality and Social Psychology Bulletin, 32(7), 917-929. Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. https://doi.org/10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv Revelle, W. and Wilt, J. (2017) Analyzing dynamic data: a tutorial. Personality and Individual Differences. DOI: 10.1016/j.paid.2017.08.020 Shrout, Patrick and Lane, Sean P (2012), Psychometrics. In M.R. Mehl and T.S. Conner (eds) Handbook of research methods for studying daily life, (p 302-320) New York. Guilford Press } \author{William Revelle} \seealso{\code{\link{sim.multi}} and \code{\link{sim.multilevel}} to generate multilevel data, \code{\link{statsBy}} a for statistics for multi level analysis. } \examples{ #data from Shrout and Lane, 2012. shrout <- structure(list(Person = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), Time = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), Item1 = c(2L, 3L, 6L, 3L, 7L, 3L, 5L, 6L, 3L, 8L, 4L, 4L, 7L, 5L, 6L, 1L, 5L, 8L, 8L, 6L), Item2 = c(3L, 4L, 6L, 4L, 8L, 3L, 7L, 7L, 5L, 8L, 2L, 6L, 8L, 6L, 7L, 3L, 9L, 9L, 7L, 8L ), Item3 = c(6L, 4L, 5L, 3L, 7L, 4L, 7L, 8L, 9L, 9L, 5L, 7L, 9L, 7L, 8L, 4L, 7L, 9L, 9L, 6L)), .Names = c("Person", "Time", "Item1", "Item2", "Item3"), class = "data.frame", row.names = c(NA, -20L)) #make shrout super wide #Xwide <- reshape(shrout,v.names=c("Item1","Item2","Item3"),timevar="Time", #direction="wide",idvar="Person") #add more helpful Names #colnames(Xwide ) <- c("Person",c(paste0("Item",1:3,".T",1),paste0("Item",1:3,".T",2), #paste0("Item",1:3,".T",3),paste0("Item",1:3,".T",4))) #make superwide into normal form (i.e., just return it to the original shrout data #Xlong <-Xlong <- reshape(Xwide,idvar="Person",2:13) #Now use these data for a multilevel repliability study, use the normal wide form output mg <- mlr(shrout,grp="Person",Time="Time",items=3:5) #which is the same as #mg <- multilevel.reliability(shrout,grp="Person",Time="Time",items= # c("Item1","Item2","Item3"),plot=TRUE) #to show the lattice plot by subjects, set plot = TRUE #Alternatively for long input (returned in this case from the prior run) mlr(mg$long,grp="id",Time ="time",items="items", values="values",long=TRUE) #example of mlArrange #First, add two new columns to shrout and #then convert to long output using mlArrange total <- rowSums(shrout[3:5]) caseid <- rep(paste0("ID",1:5),4) new.shrout <- cbind(shrout,total=total,case=caseid) #now convert to long new.long <- mlArrange(new.shrout,grp="Person",Time="Time",items =3:5,extra=6:7) headTail(new.long,6,6) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate }% use one of RShowDoc("KEYWORDS") \keyword{ models }% __ONLY ONE__ keyword per line psych/man/eigen.loadings.Rd0000644000176200001440000000301614374010402015323 0ustar liggesusers\name{eigen.loadings} \alias{eigen.loadings} \title{Convert eigen vectors and eigen values to the more normal (for psychologists) component loadings} \description{ The default procedures for principal component returns values not immediately equivalent to the loadings from a factor analysis. eigen.loadings translates them into the more typical metric of eigen vectors multiplied by the squareroot of the eigenvalues. This lets us find pseudo factor loadings if we have used princomp or eigen. \cr If we use \code{\link{principal}} to do our principal components analysis, then we do not need this routine.} \usage{ eigen.loadings(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{the output from eigen or a list of class princomp derived from princomp} } \value{ A matrix of Principal Component loadings more typical for what is expected in psychometrics. That is, they are scaled by the square root of the eigenvalues. } \author{ \email{ revelle@northwestern.edu } \cr \url{https://personality-project.org/revelle.html}} \note{Useful for SAPA analyses} \examples{ x <- eigen(Harman74.cor$cov) x$vectors[1:8,1:4] #as they appear from eigen y <- princomp(covmat=Harman74.cor$cov) y$loadings[1:8,1:4] #as they appear from princomp eigen.loadings(x)[1:8,1:4] # rescaled by the eigen values z <- pca(Harman74.cor$cov,4,rotate="none") z$loadings[1:8,1:4] #as they appear in pca } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{ multivariate }% __ONLY ONE__ keyword per line psych/man/headtail.Rd0000644000176200001440000000470514426060076014230 0ustar liggesusers\name{headTail} \alias{headtail} \alias{headTail} \alias{topBottom} \alias{quickView} \title{Combine calls to head and tail} \description{A quick way to show the first and last n lines of a data.frame, matrix, or a text object. Just a pretty call to \code{\link{head}} and \code{\link{tail}} or \code{\link{View}} } \usage{ headTail(x, top=4,bottom=4,from=1,to=NULL, digits=2, hlength = 4, tlength =4, ellipsis=TRUE) headtail(x,hlength=4,tlength=4,digits=2,ellipsis=TRUE,from=1,to=NULL) topBottom(x, top=4,bottom=4,from=1,to=NULL, digits=2, hlength = 4, tlength = 4) quickView(x,top=8,bottom=8,from=1,to=NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A matrix or data frame or free text} \item{top}{The number of lines at the beginning to show} \item{bottom}{The number of lines at the end to show} \item{digits}{Round off the data to digits} \item{ellipsis}{Separate the head and tail with dots (ellipsis)} \item{from}{The first column to show (defaults to 1)} \item{to}{The last column to show (defaults to the number of columns)} \item{hlength}{The number of lines at the beginning to show (an alias for top)} \item{tlength}{The number of lines at the end to show (an alias for bottom)} } \value{The first top and last bottom lines of a matrix or data frame with an ellipsis in between. If the input is neither a matrix nor data frame, the output will be the first top and last bottom lines. For each line, just columns starting at from and going to to will be displayed. Bt default, from = 1 and to = the last column. topBottom is just a call to headTail with ellipsis = FALSE and returning a matrix output. quickView is a call to \code{\link{View}} which opens a viewing window which is scrollable (if needed because the number of lines listed is more than a screen's worth). View (and therefore quickView) is slower than \code{\link{headTail}} or \code{\link{topBottom}}. } \seealso{ \code{\link{head}} and \code{\link{tail}}} \examples{ headTail(psychTools::iqitems,4,8,to=6) #the first 4 and last 6 items from 1 to 6 topBottom(psychTools::ability,from =2, to = 6) #the first and last 4 items from 2 to 6 headTail(psychTools::bfi,top=4, bottom=4,from =6,to=10) #the first and last 4 from 6 to 10 #not shown #quickView(ability,hlength=10,tlength=10) #this loads a spreadsheet like table } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/bi.bars.Rd0000644000176200001440000000326713463344643014004 0ustar liggesusers\name{bi.bars} \alias{bi.bars} %- Also NEED an '\alias' for EACH other topic documented here. \title{Draw pairs of bargraphs based on two groups} \description{When showing e.g., age or education distributions for two groups, it is convenient to plot them back to back. bi.bars will do so.} \usage{bi.bars(x,var=NULL,grp=NULL,horiz,color,label=NULL,zero=FALSE,xlab,ylab,...) } \arguments{ \item{x}{The data frame or matrix from which we specify the data} \item{var}{The variable to plot} \item{grp}{a grouping variable.} \item{horiz}{horizontal (default) or vertical bars} \item{color}{colors for the two groups -- defaults to blue and red} \item{label}{If specified, labels for the dependent axis } \item{zero}{If TRUE, subtract the minimum value to make the numbers range from 0 to max -min. This is useful if showing heights} \item{xlab}{xaxis label if appropriate} \item{ylab}{y axis label otherwise} \item{\dots}{Further parameters to pass to the graphing program} } \details{A trivial, if useful, function to draw back to back histograms/barplots. One for each group.} \value{a graphic} \seealso{ \code{\link{describe}}, \code{\link{describeBy}} and \code{\link{statsBy}} for descriptive statistics and \code{\link{error.bars}} \code{\link{error.bars.by}} and \code{\link{densityBy}} \code{\link{violinBy}} for graphic displays } \author{William Revelle} \examples{ #data(bfi) bi.bars(psychTools::bfi,"age","gender" ,ylab="Age",main="Age by males and females") bi.bars(psychTools::bfi,"education","gender",xlab="Education", main="Education by gender",horiz=FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ hplot } psych/man/circ.tests.Rd0000644000176200001440000001113013256544617014534 0ustar liggesusers\name{circ.tests} \alias{circ.tests} \title{ Apply four tests of circumplex versus simple structure } \description{Rotations of factor analysis and principal components analysis solutions typically try to represent correlation matrices as simple structured. An alternative structure, appealing to some, is a circumplex structure where the variables are uniformly spaced on the perimeter of a circle in a two dimensional space. Generating these data is straightforward, and is useful for exploring alternative solutions to affect and personality structure. } \usage{ circ.tests(loads, loading = TRUE, sorting = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{loads}{ A matrix of loadings \code{loads} here } \item{loading}{ Are these loadings or a correlation matrix \code{loading} } \item{sorting}{ Should the variables be sorted \code{sorting} } } \details{``A common model for representing psychological data is simple structure (Thurstone, 1947). According to one common interpretation, data are simple structured when items or scales have non-zero factor loadings on one and only one factor (Revelle & Rocklin, 1979). Despite the commonplace application of simple structure, some psychological models are defined by a lack of simple structure. Circumplexes (Guttman, 1954) are one kind of model in which simple structure is lacking. ``A number of elementary requirements can be teased out of the idea of circumplex structure. First, circumplex structure implies minimally that variables are interrelated; random noise does not a circumplex make. Second, circumplex structure implies that the domain in question is optimally represented by two and only two dimensions. Third, circumplex structure implies that variables do not group or clump along the two axes, as in simple structure, but rather that there are always interstitial variables between any orthogonal pair of axes (Saucier, 1992). In the ideal case, this quality will be reflected in equal spacing of variables along the circumference of the circle (Gurtman, 1994; Wiggins, Steiger, & Gaelick, 1981). Fourth, circumplex structure implies that variables have a constant radius from the center of the circle, which implies that all variables have equal communality on the two circumplex dimensions (Fisher, 1997; Gurtman, 1994). Fifth, circumplex structure implies that all rotations are equally good representations of the domain (Conte & Plutchik, 1981; Larsen & Diener, 1992). (Acton and Revelle, 2004) Acton and Revelle reviewed the effectiveness of 10 tests of circumplex structure and found that four did a particularly good job of discriminating circumplex structure from simple structure, or circumplexes from ellipsoidal structures. Unfortunately, their work was done in Pascal and is not easily available. Here we release R code to do the four most useful tests: 1 The Gap test of equal spacing 2 Fisher's test of equality of axes 3 A test of indifference to Rotation 4 A test of equal Variance of squared factor loadings across arbitrary rotations. To interpret the values of these various tests, it is useful to compare the particular solution to simulated solutions representing pure cases of circumplex and simple structure. See the example output from \code{\link{circ.simulation}} and compare these plots with the results of the circ.test. } \value{A list of four items is returned. These are the gap, fisher, rotation and variance test results. \item{gaps}{gap.test} \item{fisher}{fisher.test} \item{RT}{rotation.test} \item{VT}{variance.test} } \references{ Acton, G. S. and Revelle, W. (2004) Evaluation of Ten Psychometric Criteria for Circumplex Structure. Methods of Psychological Research Online, Vol. 9, No. 1 \url{https://personality-project.org/revelle/publications/acton.revelle.mpr110_10.pdf} } \author{ William Revelle} \note{ Of the 10 criterion discussed in Acton and Revelle (2004), these tests operationalize the four most useful. } \seealso{To understand the results of the circ.tests it it best to compare it to simulated values. Thus, see \code{\link{circ.simulation}}, \code{\link{sim.circ}} } \examples{ circ.data <- circ.sim(24,500) circ.fa <- fa(circ.data,2) plot(circ.fa,title="Circumplex Structure") ct <- circ.tests(circ.fa) #compare with non-circumplex data simp.data <- item.sim(24,500) simp.fa <- fa(simp.data,2) plot(simp.fa,title="Simple Structure") st <- circ.tests(simp.fa) res <- rbind(ct[1:4],st[1:4]) rownames(res) <- c("circumplex","Simple") print(res,digits=2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{models} psych/man/paired.r.Rd0000644000176200001440000000457112015460354014155 0ustar liggesusers\name{paired.r} \alias{paired.r} \title{ Test the difference between (un)paired correlations } \description{ Test the difference between two (paired or unpaired) correlations. Given 3 variables, x, y, z, is the correlation between xy different than that between xz? If y and z are independent, this is a simple t-test of the z transformed rs. But, if they are dependent, it is a bit more complicated. } \usage{ paired.r(xy, xz, yz=NULL, n, n2=NULL,twotailed=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{xy}{r(xy) } \item{xz}{r(xz) } \item{yz}{r(yz) } \item{n}{Number of subjects for first group} \item{n2}{Number of subjects in second group (if not equal to n)} \item{twotailed}{Calculate two or one tailed probability values} } \details{To find the z of the difference between two independent correlations, first convert them to z scores using the Fisher r-z transform and then find the z of the difference between the two correlations. The default assumption is that the group sizes are the same, but the test can be done for different size groups by specifying n2. If the correlations are not independent (i.e., they are from the same sample) then the correlation with the third variable r(yz) must be specified. Find a t statistic for the difference of thee two dependent correlations. } \value{a list containing the calculated t or z values and the associated two (or one) tailed probability. \item{t}{t test of the difference between two dependent correlations} \item{p}{probability of the t or of the z} \item{z}{z test of the difference between two independent correlations} } \seealso{\code{\link{r.test}} for more tests of independent as well as dependent (paired) tests. \code{\link{p.rep.r}} for the probability of replicating a particular correlation. \code{\link{cor.test}} from stats for testing a single correlation and \code{\link{corr.test}} for finding the values and probabilities of multiple correlations. See also \code{\link{set.cor}} to do multiple correlations from matrix input.} \author{ William Revelle} \examples{ paired.r(.5,.3, .4, 100) #dependent correlations paired.r(.5,.3,NULL,100) #independent correlations same sample size paired.r(.5,.3,NULL, 100, 64) # independent correlations, different sample sizes } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line psych/man/Gleser.Rd0000644000176200001440000000426512262552335013677 0ustar liggesusers\name{Gleser} \alias{Gleser} \docType{data} \title{ Example data from Gleser, Cronbach and Rajaratnam (1965) to show basic principles of generalizability theory. } \description{ Gleser, Cronbach and Rajaratnam (1965) discuss the estimation of variance components and their ratios as part of their introduction to generalizability theory. This is a adaptation of their "illustrative data for a completely matched G study" (Table 3). 12 patients are rated on 6 symptoms by two judges. Components of variance are derived from the ANOVA. } \usage{data(Gleser)} \format{ A data frame with 12 observations on the following 12 variables. J item by judge: \describe{ \item{\code{J11}}{a numeric vector} \item{\code{J12}}{a numeric vector} \item{\code{J21}}{a numeric vector} \item{\code{J22}}{a numeric vector} \item{\code{J31}}{a numeric vector} \item{\code{J32}}{a numeric vector} \item{\code{J41}}{a numeric vector} \item{\code{J42}}{a numeric vector} \item{\code{J51}}{a numeric vector} \item{\code{J52}}{a numeric vector} \item{\code{J61}}{a numeric vector} \item{\code{J62}}{a numeric vector} } } \details{ Generalizability theory is the application of a components of variance approach to the analysis of reliability. Given a G study (generalizability) the components are estimated and then may be used in a D study (Decision). Different ratios are formed as appropriate for the particular D study. } \source{ Gleser, G., Cronbach, L., and Rajaratnam, N. (1965). Generalizability of scores influenced by multiple sources of variance. Psychometrika, 30(4):395-418. (Table 3, rearranged to show increasing patient severity and increasing item severity. } \references{ Gleser, G., Cronbach, L., and Rajaratnam, N. (1965). Generalizability of scores influenced by multiple sources of variance. Psychometrika, 30(4):395-418. } \examples{ #Find the MS for each component: #First, stack the data data(Gleser) stack.g <- stack(Gleser) st.gc.df <- data.frame(stack.g,Persons=rep(letters[1:12],12), Items=rep(letters[1:6],each=24),Judges=rep(letters[1:2],each=12)) #now do the ANOVA anov <- aov(values ~ (Persons*Judges*Items),data=st.gc.df) summary(anov) } \keyword{datasets} psych/man/partial.r.Rd0000644000176200001440000001120414533734303014341 0ustar liggesusers\name{partial.r} \alias{partial.r} \title{ Find the partial correlations for a set (x) of variables with set (y) removed. } \description{A straightforward application of matrix algebra to remove the effect of the variables in the y set from the x set. Input may be either a data matrix or a correlation matrix. Variables in x and y are specified by location. If x and y are not specified, then the effect of all variables are partialled from all the other correlations. May also be done using formula input which is more convenient when comparing results to regression models. Also has the option to find part (aka semi-partial) correlations. } \usage{ partial.r(data, x, y, use="pairwise",method="pearson",part=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{A data or correlation matrix} \item{x}{The variable names or locations associated with the X set (or formula input) } \item{y}{The variable names or locations associated with the Y set to be partialled from the X set} \item{use}{How should we treat missing data? The default is pairwise complete.} \item{method}{Which method of correlation should we use, the default is pearson.} \item{part}{Find the part correlation (aka semi-partial) , defaults to finding partial correlations} } \details{There are two ways to use \code{\link{partial.r}}. One is to find the complete partial correlation matrix (that is, partial all the other variables out of each variable). This may be done by simply specifying the raw data or correlation matrix. (In the case of raw data, correlations will be found according to use and method.) In this case, just specify the data matrix. Alternatively, if we think of the data as an X matrix and Y matrix, then (D = X + Y) with correlations R. Then the partial correlations of the X predictors with the Y variables partialled out are just the last column of R^(-1). See the \code{\link{Tal.Or}} example below. The second usage is to partial a set of variables(y) out of another set (x). It is sometimes convenient to partial the effect of a number of variables (e.g., sex, age, education) out of the correlations of another set of variables. This could be done laboriously by finding the residuals of various multiple correlations, and then correlating these residuals. The matrix algebra alternative is to do it directly. To find the confidence intervals and "significance" of the correlations, use the \code{\link{corr.p}} function with n = n - s where s is the number of covariates. A perhaps easier format is to use formula input compatible with that used in \code{\link{lmCor}}. If using formula input,we specify X and Y with the partialled variables specified by subtraction. That is X ~ Y - z, This is useful in the case of multiple regression using which uses this notation. Following a thoughtful request from Fransisco Wilheim, I just find the correlations of the variables specified in the call (previously I had found the entire correlation matrix, which is a waste of time and breaks if some variables are non-numeric).) In the case of non-positive definite matrices, find the Pinv (pseudo inverse) of the matrix. } \value{The matrix of partial correlations.} \references{ Revelle, W. (in prep) An introduction to psychometric theory with applications in R. To be published by Springer. (working draft available at \url{https://personality-project.org/r/book/} } \author{ William Revelle } \seealso{ \code{\link{lmCor}} for a similar application for regression. \code{\link{lowerMat}} to neatly show a correlation matrix, and \code{\link{corr.p}} to find the confidence intervals of a correlation. } \examples{ jen <- make.hierarchical() #make up a correlation matrix lowerMat(jen[1:5,1:5]) par.r <- partial.r(jen,c(1,3,5),c(2,4)) lowerMat(par.r) #or R <- jen[1:5,1:5] par.r <- partial.r(R, y = cs(V2,V4)) lowerMat(par.r) cp <- corr.p(par.r,n=98) #assumes the jen data based upon n =100. print(cp,short=FALSE) #show the confidence intervals as well #partial all from all correlations. lowerMat(partial.r(jen)) #Consider the Tal.Or data set. lowerCor(Tal.Or) #partial gender and age from these relations (they hardly change) partial.r(Tal.Or,1:4,cs(gender,age)) #find the partial correlations between the first three variables and the DV (reaction) round(partial.r(Tal.Or[1:4])[4,1:3],2) #The partial correlations with the criterion #Consider the eminence data set from Del Giudice. if(require("psychTools")) { data(eminence) partial.r(reputation ~ works + citations - birth.year, data=eminence) #now do a part correlation partial.r(reputation ~ works + citations - birth.year, data=eminence, part=TRUE) } } \keyword{multivariate} psych/man/unidim.Rd0000644000176200001440000001260614503600243013731 0ustar liggesusers\name{unidim} \alias{unidim} \title{Several indices of the unidimensionality of a set of variables.} \description{There are a variety of ways of assessing whether a set of items measures one latent trait. \code{\link{unidim}} is just one more way. If a one factor model holds in the data, then the factor analytic decomposition F implies that FF' should reproduce the correlations with communalities along the diagonal. In this case, the fit FF' should be identical to the correlation matrix minus the uniquenesses. unidim is just the ratio of these two estimates. The higher it is, the more the evidence for unidimensionality. A number of alternative statistics are estimated. } \usage{ unidim(keys=NULL,x=NULL,cor="cor",correct=.5, check.keys=TRUE) } \arguments{ \item{x}{An input matrix or data frame. If x is not a correlation matrix, then the correlations are found.} \item{keys}{If specified, then a number of scales can be tested at once. (See \code{\link{scoreItems}} for a similar procedure.)} \item{cor}{By default, find the Pearson correlation, other options are "spearman","kendall","tet"(for tetrachoric), "poly" (for polychoric), or "mixed"} \item{correct}{If using "tetrachoric" or "polychoric" correlations, should we correct empty cells for continuity, and if so, by how much. (See \code{\link{tetrachoric}} for a discussion of this correction)} \item{check.keys}{If TRUE, then items will be keyed based upon their loadings on the first factor. Automatically done if key.list is NULL.} } \details{ This is set of new indices to test the unidimensionality of scale. A number of test cases suggest that u provides high values when the data are in fact unidimensional, low values when they are not. The logic is deceptively simple: Unidimensionality implies that a one factor model of the data fits the covariances of the data. If this is the case, then factor model implies R = FF' + U2 will have residuals of 0. Similarly, this also implies that the observed correlations will equal the model. Thus, the sum of the observed correlations (with the diagonal replaced by the communalities) should match the factor model. Compare these two models: R - U2 versus FF'. This is the rho_c estimate. It is basically a test of whether a congeneric model fits. (That is, all the items have loadings on just one factor.) This works well, but when some of the loadings are very small, even though 1 factor is correct, it is probably not a good idea to think of the items as forming a unidimen- sional scale. Thus, an alternative model (the Tau statistic) considers the residuals found by subtracting the average correlation from the observed correlations. This will achieve a maximum if the item covariances are all identical (a tau equivalent model). The product of rho_c and Tau is the measure of unidimensionality, u That is, congeneric fit x tau equivalent fit as a measure of unidimensionality. All of the results are reported in the uni list. } \value{ \item{u}{The estimate of unidimensionality which is just the product of} \item{Tau}{The fit of the average r to the correlation matrix} \item{rho_c}{The off diagonal fit from \code{\link{fa}} } \item{alpha}{Standardized alpha of the keyed items (after appropriate reversals)} \item{av.r}{The average interitem correlation of the keyed items.} \item{median.r}{The median value of the iteritem correlations of the keyed items.} \item{Unidim.A}{The unidimensional criterion when items are keyed in positive direction.} \item{Unidim }{The raw value of the unidimensional criterion} \item{raw.model}{The ratio of the FF' model to the sum(R)} \item{adj.model}{The ratio of the FF' model to the sum(R) when items are flipped.} \item{Total}{The ratio of the sum(R - uniqueness)/sum(R)} \item{Total.A}{Same ratio with flipped items} } \references{ Revelle, W. and Condon, D.M (2023) Using undim rather than omega in estimating undimensionality. Working draft available at \url{https://personality-project.org/revelle/publications/rc.23.pdf} } \author{William Revelle} \note{A perhaps interesting idea but still underdevelopment. Treat with appropriate caution. It is (perhaps) useful to compare the unidim statistics with those generated by \code{\link{omega}}. A quick way to do this is to use the \code{\link{reliability}} function which will find alpha, omega_h and omega_t as well as split half reliablities and the unidim measures. When this done, it can be seen that u is not sensitive to the number of items and seems robust across various sample sizes. } \seealso{ \code{\link{fa}} for factor analysis, \code{\link{omega}} and \code{\link{reliability}} for reliability. } \examples{ #test the unidimensionality of the five factors of the bfi data set. unidim(psychTools::bfi.keys,psychTools::bfi) unidim(psychTools::ability.keys,psychTools::ability) #Try a known 3 factor structure x <- sim.minor(nfact=3,bipolar=FALSE) #this makes all the items positive unidim(x$model) keys.list <- list(first =c(1:4),second = 5:8,third=9:12,all=1:12) unidim(keys.list, x$model) x <- sim.minor(nfact=3) unidim(keys.list,x$mode) #we flip the negative items #what about a hierarchical model? H <- sim.hierarchical() # by default, a nice hierarchical model H.keys <- list(First = paste0("V",1:3),Second=paste0("V",4:6),Third=paste0("V",7:9), All = paste0("V",1:9)) unidim(H.keys,H) } \keyword{ models }% at least one, from doc/KEYWORDS \keyword{multivariate }% __ONLY ONE__ keyword per linepsych/man/glb.algebraic.Rd0000644000176200001440000001353513256544637015144 0ustar liggesusers\name{glb.algebraic} \alias{glb.algebraic} %- Also NEED an '\alias' for EACH other topic documented here. \title{Find the greatest lower bound to reliability. } \description{ The greatest lower bound solves the ``educational testing problem". That is, what is the reliability of a test? (See \code{\link{guttman}} for a discussion of the problem). Although there are many estimates of a test reliability (Guttman, 1945) most underestimate the true reliability of a test. For a given covariance matrix of items, C, the function finds the greatest lower bound to reliability of the total score using the csdp function from the Rcsdp package.} \usage{ glb.algebraic(Cov, LoBounds = NULL, UpBounds = NULL) } \arguments{ \item{Cov}{A p * p covariance matrix. Positive definiteness is not checked.} \item{LoBounds}{A vector \eqn{l =(l_1, \dots, l_p)}{L = (l1 ... lp)} of length p with lower bounds to the diagonal elements \eqn{x_i}. The default l=(0, . . . , 0) does not imply any constraint, because positive semidefiniteness of the matrix \eqn{\tilde{ C} + Diag(x)}{C0 + Diag(x)} implies \eqn{0 \leq x_i}{0 \le xi.}} \item{UpBounds}{A vector u =(u1, . . . , up) of length p with upper bounds to the diagonal elements xi. The default is u = v.} } \details{ If C is a p * p-covariance matrix, v = diag(C) its diagonal (i. e. the vector of variances \eqn{v_i = c_{ii}}), \eqn{\tilde { C} = C - Diag(v)}{C0 = C - Diag(v)} is the covariance matrix with 0s substituted in the diagonal and x = the vector \eqn{x_1, \dots ,x_n}{(x1, . . . , xp)} the educational testing problem is (see e. g., Al-Homidan 2008) \deqn{\sum_{i=1}^p x_i \rightarrow \min}{(Sum i = 1 to p xi) -> min } s.t. \deqn{\tilde{ C} + Diag(x) \geq 0}{C0 + Diag(x) >= 0}(i.e. positive semidefinite) and \eqn{x_i \leq v_i, i=1,\dots,p}{xi \le vi, i = 1 ..., p}. This is the same as minimizing the trace of the symmetric matrix \deqn{\tilde{ C}+diag(x)=\left(\begin{array}{llll} x_1 & c_{12} & \ldots & c_{1p} \\ c_{12} & x_2 & \ldots & c_{2p} \\ \vdots & \vdots & \ddots & \vdots \\ c_{1p} & c_{2p} & \ldots & x_p\\ \end{array}\right)}{C0 + Diag(x)} s. t. \eqn{\tilde{ C} + Diag(x)}{C0 + Diag(x)} is positive semidefinite and \eqn{x_i \leq v_i}{xi \le vi}. The greatest lower bound to reliability is \deqn{\frac{\sum_{ij} \bar{c_{ij}} + \sum_i x_i}{\sum_{ij}c_{ij}}}{ (sum cij (i \ne j) + sum xi )/ sum cij} Additionally, function glb.algebraic allows the user to change the upper bounds \eqn{x_i \leq v_i}{xi \le vi} to \eqn{x_i \leq u_i}{xi \le ui} and add lower bounds \eqn{l_i \leq x_i}{li \le xi}. The greatest lower bound to reliability is applicable for tests with non-homogeneous items. It gives a sharp lower bound to the reliability of the total test score. Caution: Though glb.algebraic gives exact lower bounds for exact covariance matrices, the estimates from empirical matrices may be strongly biased upwards for small and medium sample sizes. glb.algebraic is wrapper for a call to function csdp of package Rcsdp (see its documentation). If Cov is the covariance matrix of subtests/items with known lower bounds, rel, to their reliabilities (e. g. Cronbachs \eqn{\alpha}), LoBounds can be used to improve the lower bound to reliability by setting LoBounds <- rel*diag(Cov). Changing UpBounds can be used to relax constraints \eqn{x_i \leq v_i}{xi \le vi} or to fix \eqn{x_i}{xi}-values by setting LoBounds[i] < -z; UpBounds[i] <- z. } \value{ \item{glb }{The algebraic greatest lower bound} \item{solution}{The vector x of the solution of the semidefinite program. These are the elements on the diagonal of C.} \item{status}{Status of the solution. See documentation of csdp in package Rcsdp. If status is 2 or greater or equal than 4, no glb and solution is returned. If status is not 0, a warning message is generated.} \item{Call}{The calling string} } \references{Al-Homidan S (2008). Semidefinite programming for the educational testing problem. Central European Journal of Operations Research, 16:239-249. Bentler PM (1972) A lower-bound method for the dimension-free measurement of internal consistency. Soc Sci Res 1:343-357. Fletcher R (1981) A nonlinear programming problem in statistics (educational testing). SIAM J Sci Stat Comput 2:257-267. Shapiro A, ten Berge JMF (2000). The asymptotic bias of minimum trace factor analysis, with applications to the greatest lower bound to reliability. Psychometrika, 65:413-425. ten Berge, Socan G (2004). The greatest bound to reliability of a test and the hypothesis of unidimensionality. Psychometrika, 69:613-625. } \author{Andreas Moltner \cr Center of Excellence for Assessment in Medicine/Baden-Wurttemberg\cr University of Heidelberg\cr William Revelle\cr Department of Psychology \cr Northwestern University Evanston, Illiniois \cr https://personality-project.org/revelle.html } \seealso{ For an alternative estimate of the greatest lower bound, see \code{\link{glb.fa}}. For multiple estimates of reliablity, see \code{\link{guttman}} } \examples{ Cv<-matrix(c(215, 64, 33, 22, 64, 97, 57, 25, 33, 57,103, 36, 22, 25, 36, 77),ncol=4) Cv # covariance matrix of a test with 4 subtests Cr<-cov2cor(Cv) # Correlation matrix of tests if(!require(Rcsdp)) {print("Rcsdp must be installed to find the glb.algebraic")} else { glb.algebraic(Cv) # glb of total score glb.algebraic(Cr) # glb of sum of standardized scores w<-c(1,2,2,1) # glb of weighted total score glb.algebraic(diag(w) \%*\% Cv \%*\% diag(w)) alphas <- c(0.8,0,0,0) # Internal consistency of first test is known glb.algebraic(Cv,LoBounds=alphas*diag(Cv)) # Fix all diagonal elements to 1 but the first: lb <- glb.algebraic(Cr,LoBounds=c(0,1,1,1),UpBounds=c(1,1,1,1)) lb$solution[1] # should be the same as the squared mult. corr. smc(Cr)[1] } } \keyword{multivariate} psych/man/comorbidity.Rd0000644000176200001440000000330713211415171014765 0ustar liggesusers\name{comorbidity} \alias{comorbidity} \title{ Convert base rates of two diagnoses and their comorbidity into phi, Yule, and tetrachorics } \description{In medicine and clinical psychology, diagnoses tend to be categorical (someone is depressed or not, someone has an anxiety disorder or not). Cooccurrence of both of these symptoms is called comorbidity. Diagnostic categories vary in their degree of comorbidity with other diagnostic categories. From the point of view of correlation, comorbidity is just a name applied to one cell in a four fold table. It is thus possible to analyze comorbidity rates by considering the probability of the separate diagnoses and the probability of the joint diagnosis. This gives the two by two table needed for a phi, Yule, or tetrachoric correlation. } \usage{ comorbidity(d1, d2, com, labels = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{d1}{Proportion of diagnostic category 1} \item{d2}{Proportion of diganostic category 2 } \item{com}{Proportion of comorbidity (diagnostic category 1 and 2) } \item{labels}{Names of categories 1 and 2} } \value{ \item{twobytwo }{The two by two table implied by the input } \item{phi }{Phi coefficient of the two by two table} \item{Yule}{Yule coefficient of the two by two table} \item{tetra}{Tetrachoric coefficient of the two by two table} } \author{William Revelle } \seealso{ \code{\link{phi}}, \code{\link{phi2tetra}} ,\code{\link{Yule}}, \code{\link{Yule.inv}} \code{\link{Yule2phi}}, \code{\link{tetrachoric}} and \code{\link{polychoric}}, as well as \code{\link{AUC}} for graphical displays} \examples{ comorbidity(.2,.15,.1,c("Anxiety","Depression")) } \keyword{multivariate } psych/man/corr.test.Rd0000644000176200001440000001737514527735605014420 0ustar liggesusers\name{corTest} \alias{corTest} \alias{corr.test} \alias{corr.p} \title{Find the correlations, sample sizes, and probability values between elements of a matrix or data.frame. } \description{Although the cor function finds the correlations for a matrix, it does not report probability values. cor.test does, but for only one pair of variables at a time. corr.test uses cor to find the correlations for either complete or pairwise data and reports the sample sizes and probability values as well. For symmetric matrices, raw probabilites are reported below the diagonal and correlations adjusted for multiple comparisons above the diagonal. In the case of different x and ys, the default is to adjust the probabilities for multiple tests. Both corr.test and corr.p return raw and adjusted confidence intervals for each correlation. } \usage{ corTest(x, y = NULL, use = "pairwise",method="pearson",adjust="holm", alpha=.05,ci=TRUE,minlength=5,normal=TRUE) corr.test(x, y = NULL, use = "pairwise",method="pearson",adjust="holm", alpha=.05,ci=TRUE,minlength=5,normal=TRUE) corr.p(r,n,adjust="holm",alpha=.05,minlength=5,ci=TRUE) } \arguments{ \item{x}{A matrix or dataframe } \item{y}{A second matrix or dataframe with the same number of rows as x } \item{use}{use="pairwise" is the default value and will do pairwise deletion of cases. use="complete" will select just complete cases. } \item{method}{method="pearson" is the default value. The alternatives to be passed to cor are "spearman" and "kendall". These last two are much slower, particularly for big data sets. } \item{adjust}{What adjustment for multiple tests should be used? ("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"). See \code{\link{p.adjust}} for details about why to use "holm" rather than "bonferroni"). } \item{alpha}{alpha level of confidence intervals} \item{r}{A correlation matrix} \item{n}{Number of observations if using corr.p. May be either a matrix (as returned from corr.test, or a scaler. Set to n - np if finding the significance of partial correlations. (See below). } \item{ci}{By default, confidence intervals are found. However, this leads to a noticable slowdown of speed, particularly for large problems. So, for just the rs, ts and ps, set ci=FALSE} \item{minlength}{What is the minimum length for abbreviations. Defaults to 5.} \item{normal}{By default, probabilities for method="spearman" and method="kendall" are found by normal theory. If normal=="FALSE", then repetitive calls are made to cor.test. This is much slower, but gives more accurate p values. exact is set to be FALSE which means that exact p values for small samples are not found given the problem of ties. } } \details{corr.test uses the \code{\link{cor}} function to find the correlations, and then applies a t-test to the individual correlations using the formula \deqn{t = \frac{r * \sqrt(n-2)}{\sqrt(1-r^2)} }{t = r* sqrt(n-2)/sqrt(1-r^2) } \deqn{se = \sqrt(\frac{1-r^2}{n-2}) }{se = sqrt((1-r^2)/(n-2))} The t and Standard Errors are returned as objects in the result, but are not normally displayed. Confidence intervals are found and printed if using the print(short=FALSE) option. These are found by using the fisher z transform of the correlation and then taking the range r +/- qnorm(alpha/2) * se and the standard error of the z transforms is \deqn{se = \sqrt(\frac {1}{n-3}) }{se = sqrt(1/(n-3))}. These values are then back transformed to be in correlation units. They are returned in the ci object. Note that in the case of method=="kendall" since these are the normal theory confidence intervals they are slightly too big. The probability values may be adjusted using the Holm (or other) correction. If the matrix is symmetric (no y data), then the original p values are reported below the diagonal and the adjusted above the diagonal. Otherwise, all probabilities are adjusted (unless adjust="none"). This is made explicit in the output. Confidence intervals are shown for raw and adjusted probabilities in the ci object. For those who like the conventional use of "magic asterisks" to show (stars) to represent conventional levels of significance, the object stars is returned (but not shown)). See the examples. \code{\link{corr.p}} may be applied to the results of \code{\link{partial.r}} if n is set to n - s (where s is the number of variables partialed out) Fisher, 1924. } \value{ \item{r}{The matrix of correlations} \item{n}{Number of cases per correlation} \item{t}{value of t-test for each correlation} \item{p}{two tailed probability of t for each correlation. For symmetric matrices, p values adjusted for multiple tests are reported above the diagonal. } \item{se}{standard error of the correlation} \item{ci}{the alpha/2 lower and upper values.} \item{ci2}{ci but with the adjusted pvalues as well. This was added after tests showed we were breaking some packages that were calling the ci object without bothering to check for its dimensions.} \item{ci.adj}{These are the adjusted ((Holm or Bonferroni) confidence intervals. If asking to not adjust, the Holm adjustments for the confidence intervals are shown anyway, but the probability values are not adjusted and the appropriate confidence intervals are shown in the ci object. } \item{stars}{For those people who like to show magic asterisks denoting ``statistical significance" the stars object flags those correlation values that are unlikely given normal theory. See the last example for how to print these neatly.} } \note{For very large matrices (> 200 x 200), there is a noticeable speed improvement if confidence intervals are not found. That adjusted confidence intervals are shown even when asking for no adjustment might be confusing. If you don't want adjusted intervals, just use the ci object. The adjusted values are given in the ci.adj object. } \seealso{ \code{\link{cor.test}} for tests of a single correlation, Hmisc::rcorr for an equivalant function, \code{\link{r.test}} to test the difference between correlations, and \code{\link{cortest.mat}} to test for equality of two correlation matrices. Also see \code{\link{cor.ci}} for bootstrapped confidence intervals of Pearson, Spearman, Kendall, tetrachoric or polychoric correlations. In addition \code{\link{cor.ci}} will find bootstrapped estimates of composite scales based upon a set of correlations (ala \code{\link{cluster.cor}}). In particular, see \code{\link{p.adjust}} for a discussion of p values associated with multiple tests. Other useful functions related to finding and displaying correlations include \code{link{corPlot}} to graphically display the correlation matrix, and \code{\link{lowerCor}} for finding the correlations and then displaying the lower off diagonal using the \code{\link{lowerMat}} function. \code{\link{lowerUpper}} to compare two correlation matrices. Also see \code{\link{pairs.panels}} to show the correlations and scatter plots. } \examples{ ct <- corTest(attitude) #ct <- corr.test(attitude) #find the correlations and give the probabilities ct #show the results cts <- corr.test(attitude[1:3],attitude[4:6]) #reports all values corrected for multiple tests #corr.test(sat.act[1:3],sat.act[4:6],adjust="none") #don't adjust the probabilities #take correlations and show the probabilities as well as the confidence intervals print(corr.p(cts$r,n=30),short=FALSE) #don't adjust the probabilities print(corr.test(sat.act[1:3],sat.act[4:6],adjust="none"),short=FALSE) #print out the stars object without showing quotes print(corr.test(attitude)$stars,quote=FALSE) #note that the adjusted ps are given as well kendall.r <- corr.test(bfi[1:40,4:6], method="kendall", normal=FALSE) #compare with cor.test(x=bfi[1:40,4],y=bfi[1:40,6],method="kendall", exact=FALSE) print(kendall.r,digits=6) } \keyword{multivariate } \keyword{ models } psych/man/sat.act.Rd0000644000176200001440000000341513715124577014016 0ustar liggesusers\name{sat.act} \alias{sat.act} \docType{data} \title{3 Measures of ability: SATV, SATQ, ACT} \description{Self reported scores on the SAT Verbal, SAT Quantitative and ACT were collected as part of the Synthetic Aperture Personality Assessment (SAPA) web based personality assessment project. Age, gender, and education are also reported. The data from 700 subjects are included here as a demonstration set for correlation and analysis. } \usage{data(sat.act)} \format{ A data frame with 700 observations on the following 6 variables. \describe{ \item{\code{gender}}{males = 1, females = 2} \item{\code{education}}{self reported education 1 = high school ... 5 = graduate work} \item{\code{age}}{age} \item{\code{ACT}}{ACT composite scores may range from 1 - 36. National norms have a mean of 20. } \item{\code{SATV}}{SAT Verbal scores may range from 200 - 800. } \item{\code{SATQ}}{SAT Quantitative scores may range from 200 - 800} } } \details{hese items were collected as part of the SAPA project (\url{https://www.sapa-project.org/})to develop online measures of ability (Revelle, Wilt and Rosenthal, 2009). The score means are higher than national norms suggesting both self selection for people taking on line personality and ability tests and a self reporting bias in scores. See also the iq.items data set. } \source{\url{https://personality-project.org/} } \references{Revelle, William, Wilt, Joshua, and Rosenthal, Allen (2009) Personality and Cognition: The Personality-Cognition Link. In Gruszka, Alexandra and Matthews, Gerald and Szymura, Blazej (Eds.) Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control, Springer. } \examples{ data(sat.act) describe(sat.act) pairs.panels(sat.act) } \keyword{datasets} psych/man/bock.table.Rd0000644000176200001440000000422312456772073014464 0ustar liggesusers\name{bock} \alias{bock} \alias{bock.table} \alias{lsat6} \alias{lsat7} \alias{bock.lsat} \docType{data} \title{Bock and Liberman (1970) data set of 1000 observations of the LSAT } \description{An example data set used by McDonald (1999) as well as other discussions of Item Response Theory makes use of a data table on 10 items (two sets of 5) from the Law School Admissions Test (LSAT). Included in this data set is the original table as well as the reponses for 1000 subjects on the first set (Figure Classification) and second set (Debate). } \usage{data(bock)} \format{ A data frame with 32 observations on the following 8 variables. \describe{ \item{\code{index}}{32 response patterns} \item{\code{Q1}}{Responses to item 1} \item{\code{Q2}}{Responses to item 2} \item{\code{Q3}}{Responses to item 3} \item{\code{Q4}}{Responses to item 4} \item{\code{Q5}}{Responses to item 5} \item{\code{Ob6}}{count of observations for the section 6 test} \item{\code{Ob7}}{count of observations for the section 7 test} } Two other data sets are derived from the bock dataset. These are converted using the \code{\link{table2df}} function. \describe{ \item{lsat6}{reponses to 5 items for 1000 subjects on section 6} \item{lsat7}{reponses to 5 items for 1000 subjects on section 7} } } \details{The lsat6 data set is analyzed in the ltm package as well as by McDonald (1999). lsat7 is another 1000 subjects on part 7 of the LSAT. Both sets are described by Bock and Lieberman (1970). Both sets are useful examples of testing out IRT procedures and showing the use of \code{\link{tetrachoric}} correlations and item factor analysis using the \code{\link{irt.fa}} function. } \source{ R. Darrell Bock and M. Lieberman (1970). Fitting a response model for dichotomously scored items. Psychometrika, 35(2):179-197. } \references{ R.P. McDonald. Test theory: A unified treatment. L. Erlbaum Associates, Mahwah, N.J., 1999. } \examples{ data(bock) responses <- table2df(bock.table[,2:6],count=bock.table[,7], labs= paste("lsat6.",1:5,sep="")) describe(responses) ## maybe str(bock.table) ; plot(bock.table) ... } \keyword{datasets} psych/man/range.correction.Rd0000644000176200001440000000457513256544656015737 0ustar liggesusers\name{rangeCorrection} \alias{rangeCorrection} \title{Correct correlations for restriction of range. (Thorndike Case 2) } \description{In applied settings, it is typical to find a correlation between a predictor and some criterion. Unfortunately, if the predictor is used to choose the subjects, the range of the predictor is seriously reduced. This restricts the observed correlation to be less than would be observed in the full range of the predictor. A correction for this problem is well known as Thorndike Case 2: Let R the unrestricted correlaton, r the restricted correlation, S the unrestricted standard deviation, s the restricted standard deviation, then R = (rS/s)/ sqrt(1-r^2 + r^2(S^2/s^2)). Several other cases of restriction were also considered by Thorndike and are implemented in \code{\link{rangeCorrection}}. } \usage{ rangeCorrection(r,sdu,sdr,sdxu=NULL,sdxr=NULL,case=2) } \arguments{ \item{r}{The observed correlation} \item{sdu}{The unrestricted standard deviation)} \item{sdr}{The restricted standard deviation} \item{sdxu}{Unrestricted standard deviation for case 4} \item{sdxr}{Restricted standard deviation for case 4} \item{case}{Which of the four Thurstone/Stauffer cases to use} } \details{ When participants in a study are selected on one variable, that will reduce the variance of that variable and the resulting correlation. Thorndike (1949) considered four cases of range restriction. Others have continued this discussion but have changed the case numbers. Can be used to find correlations in a restricted sample as well as the unrestricted sample. Not the same as the correction to reliability for restriction of range. } \value{The corrected correlation. } \references{ Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} Stauffer, Joseph and Mendoza, Jorge. (2001) The proper sequence for correcting correlation coefficients for range restriction and unreliability. Psychometrika, 66, 63-68. } \author{ William Revelle } \seealso{ cRRr in the psychometric package. } \examples{ rangeCorrection(.33,100.32,48.19) #example from Revelle (in prep) Chapter 4. } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models}% __ONLY ONE__ keyword per line psych/man/guttman.Rd0000644000176200001440000003261214524267270014136 0ustar liggesusers\name{splitHalf} \alias{splitHalf} \alias{guttman} \alias{tenberge} \alias{glb} \alias{glb.fa} \title{Alternative estimates of test reliabiity } \description{Eight alternative estimates of test reliability include the six discussed by Guttman (1945), four discussed by ten Berge and Zergers (1978) (\eqn{\mu_0 \dots \mu_3)} as well as \eqn{\beta} (the worst split half, Revelle, 1979), the glb (greatest lowest bound) discussed by Bentler and Woodward (1980), and \eqn{\omega_h} and \eqn{\omega_t} (McDonald, 1999; Zinbarg et al., 2005). Greatest and lowest split-half values are found by brute force or sampling. } \usage{ splitHalf(r,raw=FALSE,brute=FALSE,n.sample=15000,covar=FALSE,check.keys=TRUE, key=NULL,ci=.05,use="pairwise") guttman(r,key=NULL) tenberge(r) glb(r,key=NULL) glb.fa(r,key=NULL) } \arguments{ \item{r}{A correlation or covariance matrix or raw data matrix.} \item{raw}{return a vector of split half reliabilities} \item{brute}{Use brute force to try all combinations of n take n/2. Be careful. For e.g., n=24, this is 1,352,078 possible splits!} \item{n.sample}{If brute is false, how many samples of split halves should be tried? (16 items takes 12,780)} \item{covar}{Should the covariances or correlations be used for reliability calculations} \item{check.keys}{If TRUE, any item with a negative loading on the first factor will be flipped in sign} \item{key}{a vector of -1, 0, 1 to select or reverse key items. See \code{\link{scoreItems}} for an example of keying. If the key vector is less than the number of variables, then item numbers to be reverse can be specified.} \item{use}{Should we find the correlations using "pairwise" or "complete" (see ?cor)} \item{ci}{The alpha level to use for the confidence intervals of the split half estimates} } \details{Surprisingly, more than a century after Spearman (1904) introduced the concept of reliability to psychologists, there are still multiple approaches for measuring it. Although very popular, Cronbach's \eqn{\alpha} (1951) underestimates the reliability of a test and over estimates the first factor saturation. Using \code{\link{splitHalf}} for tests with 16 or fewer items, all possible splits may be found fairly easily. For tests with 17 or more items, n.sample splits are randomly found. Thus, for 16 or fewer items, the upper and lower bounds are precise. For 17 or more items, they are close but will probably slightly underestimate the highest and overestimate the lowest reliabilities. The guttman function includes the six estimates discussed by Guttman (1945), four of ten Berge and Zergers (1978), as well as Revelle's \eqn{\beta} (1979) using \code{\link{splitHalf}}. The companion function, \code{\link{omega}} calculates omega hierarchical (\eqn{\omega_h}) and omega total (\eqn{\omega_t}). Guttman's first estimate \eqn{\lambda_1} assumes that all the variance of an item is error: \deqn{ \lambda_1 = 1 - \frac{tr(\vec{V_x})}{V_x} = \frac{V_x - tr(\vec{V}_x)}{V_x} }{lambda 1= 1-tr(Vx)/Vx} This is a clear underestimate. The second bound, \eqn{\lambda_2}{\lambda_2}, replaces the diagonal with a function of the square root of the sums of squares of the off diagonal elements. Let \eqn{C_2 = \vec{1}( \vec{V}-diag(\vec{V})^2 \vec{1}' }, then \deqn{ \lambda_2 = \lambda_1 + \frac{\sqrt{\frac{n}{n-1}C_2}}{V_x} = \frac{V_x - tr(\vec{V}_x) + \sqrt{\frac{n}{n-1}C_2} }{V_x}}{\lambda_2= \lambda_1 + sqrt(n *(n-1)C_2)/V_x)} Effectively, this is replacing the diagonal with n * the square root of the average squared off diagonal element. Guttman's 3rd lower bound, \eqn{\lambda_3}, also modifies \eqn{\lambda_1} and estimates the true variance of each item as the average covariance between items and is, of course, the same as Cronbach's \eqn{\alpha}. \deqn{ \lambda_3 = \lambda_1 + \frac{\frac{V_X - tr(\vec{V}_X)}{n (n-1)}}{V_X} = \frac{n \lambda_1}{n-1} = \frac{n}{n-1}\Bigl(1 - \frac{tr(\vec{V})_x}{V_x}\Bigr) = \frac{n}{n-1} \frac{V_x - tr(\vec{V}_x)}{V_x} = \alpha }{\lambda 3 = ((n)/(n-1))(1-tr(Vx)/(Vx) = ((n)/(n-1))(Vx-tr(Vx)/Vx = \alpha} This is just replacing the diagonal elements with the average off diagonal elements. \eqn{\lambda_2 \geq \lambda_3}{\lambda_2 \ge \lambda_3} with \eqn{\lambda_2 > \lambda_3} if the covariances are not identical. \eqn{\lambda_3} and \eqn{\lambda_2} are both corrections to \eqn{\lambda_1} and this correction may be generalized as an infinite set of successive improvements. (Ten Berge and Zegers, 1978) \deqn{ \mu_r = \frac{1}{V_x} \bigl( p_o + (p_1 + (p_2 + \dots (p_{r-1} +( p_r)^{1/2})^{1/2} \dots )^{1/2})^{1/2} \bigr), r = 0, 1, 2, \dots }{(1/(Vx))(po + p1 = (p2 + ... (pr1) + pr^.5 )^.5^ ... .5)} where \deqn{ p_h = \sum_{i\ne j}\sigma_{ij}^{2h}, h = 0, 1, 2, \dots r-1 }{p_h = sum(\sigma^2h, h = 0, 1, 2, ... r-1 } and \deqn{ p_h = \frac{n}{n-1}\sigma_{ij}^{2h}, h = r }{p_h = n/((n-1) \sigma^2h) } tenberge and Zegers (1978). Clearly \eqn{\mu_0 = \lambda_3 = \alpha} and \eqn{ \mu_1 = \lambda_2}. \eqn{\mu_r \geq \mu_{r-1} \geq \dots \mu_1 \geq \mu_0}{\mu_r \ge \mu_{r-1} \ge \dots \mu_1 \ge \mu_0}, although the series does not improve much after the first two steps. Guttman's fourth lower bound, \eqn{\lambda_4} was originally proposed as any spit half reliability but has been interpreted as the greatest split half reliability. If \eqn{\vec{X}} is split into two parts, \eqn{\vec{X}_a} and \eqn{\vec{X}_b}, with correlation \eqn{r_{ab}} then \deqn{ \lambda_4 = 2\Bigl(1 - \frac{V_{X_a} + V_{X_b}}{V_X} \Bigr) = \frac{4 r_{ab}}{V_x} = \frac{4 r_{ab}}{V_{X_a} + V_{X_b}+ 2r_{ab}V_{X_a} V_{X_b}} }{\lambda 4 = 4rab/(Va + Vb + 2rabVaVb)} which is just the normal split half reliability, but in this case, of the most similar splits. For 16 or fewer items, this is found by trying all possible splits. For 17 or more items, this is estimated by taking n.sample random splits. \eqn{\lambda_5}, Guttman's fifth lower bound, replaces the diagonal values with twice the square root of the maximum (across items) of the sums of squared interitem covariances \deqn{ \lambda_5 = \lambda_1 + \frac{2 \sqrt{\bar{C_2}}}{V_X}. }{\lambda_5 = \lambda_1 +2/sqrt(average(C_2)/V_X.) } Although superior to \eqn{\lambda_1}, \eqn{\lambda_5} underestimates the correction to the diagonal. A better estimate would be analogous to the correction used in \eqn{\lambda_3}: \deqn{ \lambda_{5+} = \lambda_1 + \frac{n}{n-1}\frac{2 \sqrt{\bar{C_2}}}{V_X}. }{\lambda 5+ = \lambda 1 + ((n/(n-1))2/sqrt(av covariance 12)/Vx} \eqn{\lambda_6},Guttman's final bound considers the amount of variance in each item that can be accounted for the linear regression of all of the other items (the squared multiple correlation or smc), or more precisely, the variance of the errors, \eqn{e_j^2}, and is \deqn{\lambda_6 = 1 - \frac{\sum e_j^2}{V_x} = 1 - \frac{\sum(1-r_{smc}^2)}{V_x} }{\lambda 6 = 1 - sum(e^2)/Vx = 1-sum(1-r^2(smc))/Vx}. The smc is found from all the items. A modification to Guttman \eqn{\lambda_6}, \eqn{\lambda_6*} reported by the \code{\link{score.items}} function is to find the smc from the entire pool of items given, not just the items on the selected scale. Guttman's \eqn{\lambda_4} is the greatest split half reliability. Although originally found here by combining the output from three different approaches,this has now been replaced by using \code{\link{splitHalf}} to find the maximum value by brute force (for 16 or fewer items) or by taking a substantial number of random splits. The algorithms that had been tried before included: a) Do an ICLUST of the reversed correlation matrix. ICLUST normally forms the most distinct clusters. By reversing the correlations, it will tend to find the most related clusters. Truly a weird approach but tends to work. b) Alternatively, a kmeans clustering of the correlations (with the diagonal replaced with 0 to make pseudo distances) can produce 2 similar clusters. c) Clusters identified by assigning items to two clusters based upon their order on the first principal factor. (Highest to cluster 1, next 2 to cluster 2, etc.) These three procedures will produce keys vectors for assigning items to the two splits. The maximum split half reliability is found by taking the maximum of these three approaches. This is not elegant but is fast. The brute force and the sampling procedures seem to provide more stable and larger estimates. Yet another procedure, implemented in \code{\link{splitHalf}} is actually form all possible (for n items <= 16) or sample 15,000 (or more) split halfs corrected for test length. This function returns the best and worst splits as item keys that can be used for scoring purposes, if desired. Can do up to 24 items in reasonable time, but gets much slower for more than about 24 items. To do all possible splits of 24 items considers 1,352,078 splits. This will give an exact value, but this will not differ that much from random samples. For a 24 item problem with exactly 2 factors (by simulation), the worst split half is much lower than just random sampling would indicate. s Timings on a MacPro for a 24 item problem with a 2.4 GHz 8 core are .24 secs for the default 10,000 samples, .678 for 30,000 samples and 22.58 sec for all possible. The values of the maximum split for these sample sizes were .799,/.804 and .800 for three replications of the default sample size of 10000, .805 and .806 for two sets of 30,000 and .812 for an exhaustive search. There are three greatest lower bound functions. One, glb finds the greatest split half reliability, \eqn{\lambda_4}. This considers the test as set of items and examines how best to partition the items into splits. The other two, \code{\link{glb.fa}} and \code{\link{glb.algebraic}}, are alternative ways of weighting the diagonal of the matrix. \code{\link{glb.fa}} estimates the communalities of the variables from a factor model where the number of factors is the number with positive eigen values. Then reliability is found by \deqn{ glb = 1 - \frac{\sum e_j^2}{V_x} = 1 - \frac{\sum(1- h^2)}{V_x} }{glb = 1 - sum(e^2)/Vx = 1-sum(1-h^2)/Vx} This estimate will differ slightly from that found by \code{\link{glb.algebraic}}, written by Andreas Moeltner which uses calls to csdp in the Rcsdp package. His algorithm, which more closely matches the description of the glb by Jackson and Woodhouse, seems to have a positive bias (i.e., will over estimate the reliability of some items; they are said to be = 1) for small sample sizes. More exploration of these two algorithms is underway. Compared to \code{\link{glb.algebraic}}, \code{\link{glb.fa}} seems to have less (positive) bias for smallish sample sizes (n < 500) but larger for large (> 1000) sample sizes. This interacts with the number of variables so that equal bias sample size differs as a function of the number of variables. The differences are, however small. As samples sizes grow, \code{\link{glb.algebraic}} seems to converge on the population value while glb.fa has a positive bias. } \value{ \item{beta}{The worst split half reliability. This is an estimate of the general factor saturation.} \item{maxrb}{The maximimum split half reliability. This is Guttman's lambda 4} \item{alpha}{Also known as Guttman's Lambda 3} \item{ci}{The 2.5\%, 50\%, and 97.5\% values of the raw or sampled split half. Note that it necessary to specify raw=TRUE to get these.} \item{tenberge$mu1}{tenBerge mu 1 is functionally alpha} \item{tenberge$mu2}{one of the sequence of estimates mu1 ... mu3} \item{glb}{glb found from factor analysis} } \references{ Cronbach, L.J. (1951) Coefficient alpha and the internal strucuture of tests. Psychometrika, 16, 297-334. Guttman, L. (1945). A basis for analyzing test-retest reliability. Psychometrika, 10 (4), 255-282. Revelle, W. (1979). Hierarchical cluster-analysis and the internal structure of tests. Multivariate Behavioral Research, 14 (1), 57-74. Revelle, W. and Condon, D.M. (2019) Reliability from alpha to omega: A tutorial. Psychological Assessment, 31, 12, 1395-1411. DOI: 10.1037/pas0000754. \url{https://osf.io/preprints/psyarxiv/2y3w9} Preprint available from PsyArxiv Revelle, W. and Zinbarg, R. E. (2009) Coefficients alpha, beta, omega and the glb: comments on Sijtsma. Psychometrika, 2009. Ten Berge, J. M. F., & Zegers, F. E. (1978). A series of lower bounds to the reliability of a test. Psychometrika, 43 (4), 575-579. Zinbarg, R. E., Revelle, W., Yovel, I., & Li, W. (2005). Cronbach's \eqn{\alpha} , Revelle's \eqn{\beta} , and McDonald's \eqn{\omega_h} ): Their relations with each other and two alternative conceptualizations of reliability. \doi{10.1007/s11336-003-0974-7} Psychometrika, 70 (1), 123-133. } \author{ William Revelle } \seealso{ \code{\link{reliability}}, \code{\link{alpha}}, \code{\link{omega}}, \code{\link{ICLUST}}, \code{\link{unidim}}, \code{\link{glb.algebraic}} } \examples{ data(attitude) splitHalf(attitude) splitHalf(attitude,covar=TRUE) #do it on the covariances temp <- splitHalf(attitude,raw=TRUE) temp$ci #to show the confidence intervals, you need to specify that raw=TRUE glb(attitude) glb.fa(attitude) if(require(Rcsdp)) {glb.algebraic(cor(attitude)) } guttman(attitude) #to show the histogram of all possible splits for the ability test #sp <- splitHalf(psychTools::ability,raw=TRUE) #this saves the results #hist(sp$raw,breaks=101,ylab="SplitHalf reliability",main="SplitHalf # reliabilities of a test with 16 ability items") sp <- splitHalf(psychTools::bfi[1:10],key=c(1,9,10)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate } psych/man/irt.fa.Rd0000644000176200001440000002776014262412621013641 0ustar liggesusers\name{irt.fa} \alias{irt.fa} \alias{irt.select} \alias{fa2irt} \title{Item Response Analysis by Exploratory Factor Analysis of tetrachoric/polychoric correlations} \description{ Although exploratory factor analysis and Item Response Theory seem to be very different models of binary data, they can provide equivalent parameter estimates of item difficulty and item discrimination. Tetrachoric or polychoric correlations of a data set of dichotomous or polytomous items may be factor analysed using a minimum residual or maximum likelihood factor analysis and the result loadings transformed to item discrimination parameters. The tau parameter from the tetrachoric/polychoric correlations combined with the item factor loading may be used to estimate item difficulties. } \usage{ irt.fa(x,nfactors=1,correct=TRUE,plot=TRUE,n.obs=NULL,rotate="oblimin",fm="minres", sort=FALSE,...) irt.select(x,y) fa2irt(f,rho,plot=TRUE,n.obs=NULL) } \arguments{ \item{x}{A data matrix of dichotomous or discrete items, or the result of \code{\link{tetrachoric}} or \code{\link{polychoric}} } \item{nfactors}{Defaults to 1 factor} \item{correct}{If true, then correct the tetrachoric correlations for continuity. (See \code{\link{tetrachoric}}). } \item{plot}{If TRUE, automatically call the \code{\link{plot.irt}} or \code{\link{plot.poly}} functions.} \item{y}{the subset of variables to pick from the rho and tau output of a previous irt.fa analysis to allow for further analysis.} \item{n.obs}{The number of subjects used in the initial analysis if doing a second analysis of a correlation matrix. In particular, if using the fm="minchi" option, this should be the matrix returned by \code{\link{count.pairwise}}.} \item{rotate}{The default rotation is oblimin. See \code{\link{fa}} for the other options.} \item{fm}{The default factor extraction is minres. See \code{\link{fa}} for the other options.} \item{f}{The object returned from \code{\link{fa}} } \item{rho}{The object returned from \code{\link{polychoric}} or \code{\link{tetrachoric}}. This will include both a correlation matrix and the item difficulty levels.} \item{sort}{Should the factor loadings be sorted before preparing the item information tables. Defaults to FALSE as this is more useful for scoring items. For tabular output it is better to have sort=TRUE.} \item{...}{Additional parameters to pass to the factor analysis function} } \details{ \code{\link{irt.fa}} combines several functions into one to make the process of item response analysis easier. Correlations are found using either \code{\link{tetrachoric}} or \code{\link{polychoric}}. Exploratory factor analyeses with all the normal options are then done using \code{\link{fa}}. The results are then organized to be reported in terms of IRT parameters (difficulties and discriminations) as well as the more conventional factor analysis output. In addition, because the correlation step is somewhat slow, reanalyses may be done using the correlation matrix found in the first step. In this case, if it is desired to use the fm="minchi" factoring method, the number of observations needs to be specified as the matrix resulting from \code{\link{pairwiseCount}}. The tetrachoric correlation matrix of dichotomous items may be factored using a (e.g.) minimum residual factor analysis function \code{\link{fa}} and the resulting loadings, \eqn{\lambda_i} are transformed to discriminations by \eqn{\alpha = \frac{\lambda_i}{\sqrt{1-\lambda_i^2}} }{a = \lambda / (sqrt(1-\lambda^2)}. The difficulty parameter, \eqn{\delta} is found from the \eqn{\tau} parameter of the \code{\link{tetrachoric}} or \code{\link{polychoric}} function. \eqn{\delta_i = \frac{\tau_i}{\sqrt{1-\lambda_i^2}}}{\delta = \tau / (sqrt(1-\lambda^2)} Similar analyses may be done with discrete item responses using polychoric correlations and distinct estimates of item difficulty (location) for each item response. The results may be shown graphically using \code{\link{plot.irt}} for dichotomous items or \code{\link{plot.poly}} for polytomous items. These are called by plotting the \code{\link{irt.fa}} output, see the examples). For plotting there are three options: type = "ICC" will plot the item characteristic response function. type = "IIC" will plot the item information function, and type= "test" will plot the test information function. Invisible output from the plot function will return tables of item information as a function of several levels of the trait, as well as the standard error of measurement and the reliability at each of those levels. The normal input is just the raw data. If, however, the correlation matrix has already been found using \code{\link{tetrachoric}}, \code{\link{polychoric}}, or a previous analysis using \code{\link{irt.fa}} then that result can be processed directly. Because \code{\link{irt.fa}} saves the rho and tau matrices from the analysis, subsequent analyses of the same data set are much faster if the input is the object returned on the first run. A similar feature is available in \code{\link{omega}}. The output is best seen in terms of graphic displays. Plot the output from irt.fa to see item and test information functions. The print function will print the item location and discriminations. The additional factor analysis output is available as an object in the output and may be printed directly by specifying the $fa object. The \code{\link{irt.select}} function is a helper function to allow for selecting a subset of a prior analysis for further analysis. First run irt.fa, then select a subset of variables to be analyzed in a subsequent irt.fa analysis. Perhaps a better approach is to just plot and find the information for selected items. The plot function for an irt.fa object will plot ICC (item characteristic curves), IIC (item information curves), or test information curves. In addition, by using the "keys" option, these three kinds of plots can be done for selected items. This is particularly useful when trying to see the information characteristics of short forms of tests based upon the longer form factor analysis. The plot function will also return (invisibly) the informaton at multiple levels of the trait, the average information (area under the curve) as well as the location of the peak information for each item. These may be then printed or printed in sorted order using the sort option in print. } \value{ \item{irt }{A list of Item location (difficulty) and discrimination} \item{fa }{A list of statistics for the factor analyis} \item{rho}{The tetrachoric/polychoric correlation matrix} \item{tau}{The tetrachoric/polychoric cut points} } \references{ Kamata, Akihito and Bauer, Daniel J. (2008) A Note on the Relation Between Factor Analytic and Item Response Theory Models Structural Equation Modeling, 15 (1) 136-153. McDonald, Roderick P. (1999) Test theory: A unified treatment. L. Erlbaum Associates. Revelle, William. (in prep) An introduction to psychometric theory with applications in R. Springer. Working draft available at \url{https://personality-project.org/r/book/} } \author{William Revelle} \note{ \code{\link{irt.fa}} makes use of the \code{\link{tetrachoric}} or \code{\link{tetrachoric}} functions. Both of these will use multiple cores if this is an option. To set these use options("mc.cores"=x) where x is the number of cores to use. (Macs default to 2, PCs seem to default to 1). In comparing irt.fa to the ltm function in the ltm package or to the analysis reported in Kamata and Bauer (2008) the discrimination parameters are not identical, because the irt.fa reports them in units of the normal curve while ltm and Kamata and Bauer report them in logistic units. In addition, Kamata and Bauer do their factor analysis using a logistic error model. Their results match the irt.fa results (to the 2nd or 3rd decimal) when examining their analyses using a normal model. (With thanks to Akihito Kamata for sharing that analysis.) \code{\link{irt.fa}} reports parameters in normal units. To convert them to conventional IRT parameters, multiply by 1.702. In addition, the location parameter is expressed in terms of difficulty (high positive scores imply lower frequency of response.) The results of \code{\link{irt.fa}} can be used by \code{\link{score.irt}} for irt based scoring. First run \code{\link{irt.fa}} and then score the results using a two parameter model using \code{\link{score.irt}}. There is also confusion in the literature of how to interpret the tau parameter. Here I treat it the item threshold for a response, which is to say that tau reflects item difficulty. Further note that the rho parameter in the \code{\link{fa2irt}} function is the result from a \code{\link{tetrachoric}} or \code{\link{polychoric}} analysis and reflects both the correlations and the item thresholds. } \seealso{\code{\link{fa}}, \code{\link{sim.irt}}, \code{\link{tetrachoric}}, \code{\link{polychoric}} as well as \code{\link{plot.psych}} for plotting the IRT item curves. The use of \code{\link{fa.extend}} is helpful for the case of an incomplete block design to find the factor solution. See also \code{\link{score.irt}} for scoring items based upon these parameter estimates. \code{\link{irt.responses}} will plot the empirical response curves for the alternative response choices for multiple choice items. } \examples{ \dontrun{ set.seed(17) d9 <- sim.irt(9,1000,-2.5,2.5,mod="normal") #dichotomous items test <- irt.fa(d9$items) test op <- par(mfrow=c(3,1)) plot(test,type="ICC") plot(test,type="IIC") plot(test,type="test") par(op) #compare this result to first finding the correlations, then factoring them, and # then applying fa2irt. R <- tetrachoric(d9$items,correct=TRUE) f <- fa(R$rho) test2 <- fa2irt(f,R) test2 test2$plot$sumInfo[[1]] - test$plot$sumInfo[[1]] #identical results set.seed(17) items <- sim.congeneric(N=500,short=FALSE,categorical=TRUE) #500 responses to 4 discrete items d4 <- irt.fa(items$observed) #item response analysis of congeneric measures d4 #show just the irt output d4$fa #show just the factor analysis output op <- par(mfrow=c(2,2)) plot(d4,type="ICC") par(op) #using the iq data set for an example of real items #first need to convert the responses to tf data(iqitems) iq.keys <- c(4,4,4, 6, 6,3,4,4, 5,2,2,4, 3,2,6,7) iq.tf <- score.multiple.choice(iq.keys,psychTools::iqitems,score=FALSE) #just the responses iq.irt <- irt.fa(iq.tf) print(iq.irt,short=FALSE) #show the IRT as well as factor analysis output p.iq <- plot(iq.irt) #save the invisible summary table p.iq #show the summary table of information by ability level #select a subset of these variables small.iq.irt <- irt.select(iq.irt,c(1,5,9,10,11,13)) small.irt <- irt.fa(small.iq.irt) plot(small.irt) #find the information for three subset of iq items keys <- make.keys(16,list(all=1:16,some=c(1,5,9,10,11,13),others=c(1:5))) plot(iq.irt,keys=keys) } #compare output to the ltm package or Kamata and Bauer -- these are in logistic units ls <- irt.fa(lsat6) #library(ltm) # lsat.ltm <- ltm(lsat6~z1) # round(coefficients(lsat.ltm)/1.702,3) #convert to normal (approximation) # # Dffclt Dscrmn #Q1 -1.974 0.485 #Q2 -0.805 0.425 #Q3 -0.164 0.523 #Q4 -1.096 0.405 #Q5 -1.835 0.386 #Normal results ("Standardized and Marginal")(from Akihito Kamata ) #Item discrim tau # 1 0.4169 -1.5520 # 2 0.4333 -0.5999 # 3 0.5373 -0.1512 # 4 0.4044 -0.7723 # 5 0.3587 -1.1966 #compare to ls #Normal results ("Standardized and conditional") (from Akihito Kamata ) #item discrim tau # 1 0.3848 -1.4325 # 2 0.3976 -0.5505 # 3 0.4733 -0.1332 # 4 0.3749 -0.7159 # 5 0.3377 -1.1264 #compare to ls$fa and ls$tau #Kamata and Bauer (2008) logistic estimates #1 0.826 2.773 #2 0.723 0.990 #3 0.891 0.249 #4 0.688 1.285 #5 0.657 2.053 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } \keyword{ models} psych/man/describe.Rd0000755000176200001440000002257613724332064014245 0ustar liggesusers\name{describe} \alias{describe} \alias{describeData} \alias{describeFast} \title{ Basic descriptive statistics useful for psychometrics } \description{ There are many summary statistics available in R; this function provides the ones most useful for scale construction and item analysis in classic psychometrics. Range is most useful for the first pass in a data set, to check for coding errors. } \usage{ describe(x, na.rm = TRUE, interp=FALSE,skew = TRUE, ranges = TRUE,trim=.1, type=3,check=TRUE,fast=NULL,quant=NULL,IQR=FALSE,omit=FALSE,data=NULL) describeData(x,head=4,tail=4) describeFast(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A data frame or matrix} \item{na.rm}{The default is to delete missing data. na.rm=FALSE will delete the case. } \item{interp}{Should the median be standard or interpolated} \item{skew}{ Should the skew and kurtosis be calculated? } \item{ranges}{ Should the range be calculated? } \item{trim}{trim=.1 -- trim means by dropping the top and bottom trim fraction} \item{type}{Which estimate of skew and kurtosis should be used? (See details.) } \item{check}{Should we check for non-numeric variables? Slower but helpful.} \item{fast}{if TRUE, will do n, means, sds, min, max, ranges for an improvement in speed. If NULL, will switch to fast mode for large (ncol * nrow > 10^7) problems, otherwise defaults to fast = FALSE} \item{quant}{if not NULL, will find the specified quantiles (e.g. quant=c(.25,.75) will find the 25th and 75th percentiles)} \item{IQR}{If TRUE, show the interquartile range} \item{omit}{Do not convert non-numerical variables to numeric, omit them instead} \item{head}{show the first 1:head cases for each variable in describeData} \item{tail}{Show the last nobs-tail cases for each variable in describeData} \item{data}{Allows formula input for specific grouping variables} } \details{In basic data analysis it is vital to get basic descriptive statistics. Procedures such as \code{\link{summary}} and Hmisc::describe do so. The describe function in the \code{\link{psych}} package is meant to produce the most frequently requested stats in psychometric and psychology studies, and to produce them in an easy to read data.frame. If a grouping variable is called for in formula mode, it will also call \code{\link{describeBy}} to the processing. The results from describe can be used in graphics functions (e.g., \code{\link{error.crosses}}). The range statistics (min, max, range) are most useful for data checking to detect coding errors, and should be found in early analyses of the data. Although describe will work on data frames as well as matrices, it is important to realize that for data frames, descriptive statistics will be reported only for those variables where this makes sense (i.e., not for alphanumeric data). If the check option is TRUE, variables that are categorical or logical are converted to numeric and then described. These variables are marked with an * in the row name. This is somewhat slower. Note that in the case of categories or factors, the numerical ordering is not necessarily the one expected. For instance, if education is coded "high school", "some college" , "finished college", then the default coding will lead to these as values of 2, 3, 1. Thus, statistics for those variables marked with * should be interpreted cautiously (if at all). In a typical study, one might read the data in from the clipboard (\code{\link[psychTools]{read.clipboard}}), show the splom plot of the correlations (\code{\link{pairs.panels}}), and then describe the data. na.rm=FALSE is equivalent to describe(na.omit(x)) When finding the skew and the kurtosis, there are three different options available. These match the choices available in skewness and kurtosis found in the e1071 package (see Joanes and Gill (1998) for the advantages of each one). If we define \eqn{m_r = [\sum(X- mx)^r]/n}{m_r = [sum(X- mx)^r]/n} then Type 1 finds skewness and kurtosis by \eqn{g_1 = m_3/(m_2)^{3/2} } and \eqn{g_2 = m_4/(m_2)^2 -3}. Type 2 is \eqn{G1 = g1 * \sqrt{n *(n-1)}/(n-2)} and \eqn{G2 = (n-1)*[(n+1)g2 +6]/((n-2)(n-3))}. Type 3 is \eqn{b1 = [(n-1)/n]^{3/2} m_3/m_2^{3/2}} and \eqn{b2 = [(n-1)/n]^{3/2} m_4/m_2^2)}. The additional helper function \code{\link{describeData}} just scans the data array and reports on whether the data are all numerical, logical/factorial, or categorical. This is a useful check to run if trying to get descriptive statistics on very large data sets where to improve the speed, the check option is FALSE. An even faster overview of the data is \code{\link{describeFast}} which reports the number of total cases, number of complete cases, number of numeric variables and the number which are factors. The fast=TRUE option will lead to a speed up of about 50\% for larger problems by not finding all of the statistics (see NOTE) To describe the data for different groups, see \code{\link{describeBy}} or specify the grouping variable(s) in formula mode (see the examples). } \value{ A data.frame of the relevant statistics: \cr item name \cr item number \cr number of valid cases\cr mean\cr standard deviation\cr trimmed mean (with trim defaulting to .1) \cr median (standard or interpolated\cr mad: median absolute deviation (from the median). \cr minimum\cr maximum\cr skew\cr kurtosis\cr standard error\cr } \note{For very large data sets that are data.frames, \code{\link{describe}} can be rather slow. Converting the data to a matrix first is recommended. However, if the data are of different types, (factors or logical), this is not possible. If the data set includes columns of character data, it is also not possible. Thus, a quick pass with \code{\link{describeData}} is recommended. Even faster is a quick pass with \code{\link{describeFast}} which just counts number of observations per variable and reports the type of data (numerical, factor, logical). For the greatest speed, at the cost of losing information, do not ask for ranges or for skew and turn off check. This is done automatically if the fast option is TRUE or for large data sets. Note that by default, fast=NULL. But if the number of cases x number of variables exceeds (ncol * nrow > 10^7), fast will be set to TRUE. This will provide just n, mean, sd, min, max, range, and standard errors. To get all of the statistics (but at a cost of greater time) set fast=FALSE. The problem seems to be a memory limitation in that the time taken is an accelerating function of nvars * nobs. Thus, for a largish problem (72,000 cases with 1680 variables) which might take 330 seconds, doing it as two sets of 840 variable cuts the time down to 80 seconds. The object returned is a data frame with the normal precision of R. However, to control the number of digits displayed, you can set digits in a print command, rather than losing precision at the descriptive stats level. See the last two examples. One just sets the number of digits, one gives uses signif to make 'prettier' output where all numbers are displayed to the same number of digits. The MAD (median absolute deviation from the median) is calculated using the mad function from the stats package in Core-R. Note that by default, the MAD is adjusted by a scaling factor (1.4826) that will give the expectation of the MAD to be the same as the standard deviation for normal data. An interesting problem with describe is that a function with the same name is in the Hmisc package. HMisc is loaded by qqgraph which in turn is loaded by SemPlot. So even if not directly loading HMisc, if you load SemPlot after loading psych, describe will not work, but the reverse order for loading should work. } \author{ \url{https://personality-project.org/revelle.html} \cr Maintainer: William Revelle \email{revelle@northwestern.edu} \cr } \references{Joanes, D.N. and Gill, C.A (1998). Comparing measures of sample skewness and kurtosis. The Statistician, 47, 183-189.} \seealso{ \code{\link{describeBy}}, \code{\link{skew}}, \code{\link{kurtosi}} \code{\link{interp.median}}, \code{\link[psychTools]{read.clipboard}}. Then, for graphic output, see \code{\link{error.crosses}}, \code{\link{pairs.panels}}, \code{\link{error.bars}}, \code{\link{error.bars.by}} and \code{\link{densityBy}}, or \code{\link{violinBy}}} \examples{ data(sat.act) describe(sat.act) describe(sat.act ~ gender) #formula mode option calls describeBy for the entire data frame describe(SATV + SATQ ~ gender, data=sat.act) #formula mode specifies just two variables describe(sat.act,skew=FALSE) describe(sat.act,IQR=TRUE) #show the interquartile Range describe(sat.act,quant=c(.1,.25,.5,.75,.90) ) #find the 10th, 25th, 50th, #75th and 90th percentiles describeData(sat.act) #the fast version just gives counts and head and tail print(describeFast(sat.act),short=FALSE) #even faster is just counts (just less information) #now show how to adjust the displayed number of digits des <- describe(sat.act) #find the descriptive statistics. Keep the original accuracy des #show the normal output, which is rounded to 2 decimals print(des,digits=3) #show the output, but round to 3 (trailing) digits print(des, signif=3) #round all numbers to the 3 significant digits } \keyword{ multivariate }% at least one, from doc/KEYWORDS \keyword{ models }% __ONLY ONE__ keyword per line \keyword{univar} psych/man/bifactor.Rd0000644000176200001440000002126514273320770014246 0ustar liggesusers\name{Bechtoldt} \alias{Bechtoldt.1} \alias{Bechtoldt.2} \alias{Bechtoldt} \alias{Holzinger} \alias{Holzinger.9} \alias{Reise} \alias{Thurstone} \alias{Thurstone.33} \alias{Thurstone.33G} \alias{Thurstone.9} \docType{data} \title{Seven data sets showing a bifactor solution.} \description{Holzinger-Swineford (1937) introduced the bifactor model of a general factor and uncorrelated group factors. The Holzinger data sets are original 14 * 14 matrix from their paper as well as a 9 *9 matrix used as an example by Joreskog. The Thurstone correlation matrix is a 9 * 9 matrix of correlations of ability items. The Reise data set is 16 * 16 correlation matrix of mental health items. The Bechtholdt data sets are both 17 x 17 correlation matrices of ability tests. } \usage{ data(Thurstone) data(Thurstone.33) data(Thurstone.33G) data(Thurstone.9) data(Holzinger) data(Holzinger.9) data(Bechtoldt) data(Bechtoldt.1) data(Bechtoldt.2) data(Reise) } \details{Holzinger and Swineford (1937) introduced the bifactor model (one general factor and several group factors) for mental abilities. This is a nice demonstration data set of a hierarchical factor structure that can be analyzed using the \code{\link{omega}} function or using sem. The bifactor model is typically used in measures of cognitive ability. There are several ways to analyze such data. One is to use the \code{\link{omega}} function to do a hierarchical factoring using the Schmid-Leiman transformation. This can then be done as an exploratory and then as a confirmatory model using \code{\link{omegaSem}}. Another way is to do a regular factor analysis and use either a \code{\link{bifactor}} or \code{\link{biquartimin}} rotation. These latter two functions implement the Jennrich and Bentler (2011) bifactor and biquartimin transformations. The \code{\link{bifactor}} rotation suffers from the problem of local minima (Mansolf and Reise, 2016) and thus a mixture of exploratory and confirmatory analysis might be preferred. The 14 variables are ordered to reflect 3 spatial tests, 3 mental speed tests, 4 motor speed tests, and 4 verbal tests. The sample size is 355. Another data set from Holzinger (Holzinger.9) represents 9 cognitive abilities (Holzinger, 1939) and is used as an example by Karl Joreskog (2003) for factor analysis by the MINRES algorithm and also appears in the LISREL manual as example NPV.KM. This data set represents the scores from the Grant White middle school for 9 tests: "t01_visperc" "t02_cubes" "t04_lozenges" "t06_paracomp" "t07_sentcomp" "t09_wordmean" "t10_addition" "t12_countdot" and "t13_sccaps" and as variables x1 ... x9 (for the Grant-White school) in the lavaan package. Another classic data set is the 9 variable Thurstone problem which is discussed in detail by R. P. McDonald (1985, 1999) and and is used as example in the sem package as well as in the PROC CALIS manual for SAS. These nine tests were grouped by Thurstone and Thurstone, 1941 (based on other data) into three factors: Verbal Comprehension, Word Fluency, and Reasoning. The original data came from Thurstone and Thurstone (1941) but were reanalyzed by Bechthold (1961) who broke the data set into two. McDonald, in turn, selected these nine variables from the larger set of 17 found in Bechtoldt.2. The sample size is 213. Another set of 9 cognitive variables attributed to Thurstone (1933) is the data set of 4,175 male students reported by Carl Brigham of Princeton to the College Entrance Examination Board. (Brigham, 1932, Table VIII p 352.) This set does not show a clear bifactor solution but is included as a demonstration of the differences between a maximimum likelihood factor analysis solution versus a principal axis factor solution. On page 352, we are given reliability estimates of .86, .73, .83, .97,.94, .85, .92, .92, and .90. These are based upon sampling 743 boys. A parallel set (also from Brigham) is the correlation matrix of the same 9 variables for 2899 girls. (Thurstone.33G) Tucker (1958) uses 9 variables from Thurstone and Thurstone (1941) for his example of \code{\link{interbattery}} factor analysis. More recent applications of the bifactor model are to the measurement of psychological status. The Reise data set is a correlation matrix based upon >35,000 observations to the Consumer Assessment of Health Care Provideers and Systems survey instrument. Reise, Morizot, and Hays (2007) describe a bifactor solution based upon 1,000 cases. The five factors from Reise et al. reflect Getting care quickly (1-3), Doctor communicates well (4-7), Courteous and helpful staff (8,9), Getting needed care (10-13), and Health plan customer service (14-16). The two Bechtoldt data sets are two samples from Thurstone and Thurstone (1941). They include 17 variables, 9 of which were used by McDonald to form the Thurstone data set. The sample sizes are 212 and 213 respectively. The six proposed factors reflect memory, verbal, words, space, number and reasoning with three markers for all expect the rote memory factor. 9 variables from this set appear in the Thurstone data set. Two more data sets with similar structures are found in the \code{\link{Harman}} data set. This includes the another 9 variables (with 696 subjects) from Holzinger used by Harman \code{link{Harman.Holzinger}} as well as 8 affective variables from \code{link{burt}}. Another data set that is worth examining for tests of bifactor structure is the holzinger.swineford data set which includes the original data from Holzinger and Swineford (1939) supplied by Keith Widaman. This is in the psychTools package. \itemize{ \item Bechtoldt.1: 17 x 17 correlation matrix of ability tests, N = 212. \item Bechtoldt.2: 17 x 17 correlation matrix of ability tests, N = 213. \item Holzinger: 14 x 14 correlation matrix of ability tests, N = 355 \item Holzinger.9: 9 x 9 correlation matrix of ability tests, N = 145 \item Reise: 16 x 16 correlation matrix of health satisfaction items. N = 35,000 \item Thurstone: 9 x 9 correlation matrix of ability tests, N = 213 \item Thurstone.33: Another 9 x 9 correlation matrix of ability tests, N=4175 \item Thurstone:9: And yet another 9 x 9 correlation matrix of ability tests, N =710 } } \source{Holzinger: Holzinger and Swineford (1937) \cr Reise: Steve Reise (personal communication) \cr sem help page (for Thurstone) Brigham (for Thurstone.33) } \references{ Bechtoldt, Harold, (1961). An empirical study of the factor analysis stability hypothesis. Psychometrika, 26, 405-432. Brigham, Carl C. (1932) A study of errors. College Entrance Examination Board. Holzinger, Karl and Swineford, Frances (1937) The Bi-factor method. Psychometrika, 2, 41-54 Holzinger, K., & Swineford, F. (1939). A study in factor analysis: The stability of a bifactor solution. Supplementary Educational Monograph, no. 48. Chicago: University of Chicago Press. McDonald, Roderick P. (1999) Test theory: A unified treatment. L. Erlbaum Associates. Mahwah, N.J. Mansolf, Maxwell and Reise, Steven P. (2016) Exploratory Bifactor Analysis: The Schmid-Leiman Orthogonalization and Jennrich-Bentler Analytic Rotations, Multivariate Behavioral Research, 51:5, 698-717, DOI: 10.1080/00273171.2016.1215898 Reise, Steven and Morizot, Julien and Hays, Ron (2007) The role of the bifactor model in resolving dimensionality issues in health outcomes measures. Quality of Life Research. 16, 19-31. Thurstone, Louis Leon (1933) The theory of multiple factors. Edwards Brothers, Inc. Ann Arbor. Thurstone, Louis Leon and Thurstone, Thelma (Gwinn). (1941) Factorial studies of intelligence. The University of Chicago Press. Chicago, Il. Tucker, Ledyard (1958) An inter-battery method of factor analysis, Psychometrika, 23, 111-136. } \note{Note that these are tests, not items. Thus, it was possible to find the reliabilities of each test. For the Holzinger 14 tests these were found from 1- t2 where t = c(.332, .517, .360, .382, .354,.249, .444, .393, .455, .424, .393, .487, .534, .382) (page 53) and thus the reliabilities were 0.890, 0.733, 0.870, 0.854, 0.875, 0.938, 0.803, 0.846, 0.793, 0.820, 0.846, 0.763, 0.715, 0.854. For the Holzinger.9 tests, the reliabilities for the Grant-White tests were: .76, .57, .94, .65, .75, .87, .95, .84 and .89 (Keith Widaman, personal communication, 2020), } \examples{ if(!require(GPArotation)) {message("I am sorry, to run omega requires GPArotation") } else { #holz <- omega(Holzinger,4, title = "14 ability tests from Holzinger-Swineford") #bf <- omega(Reise,5,title="16 health items from Reise") #omega(Reise,5,labels=colnames(Reise),title="16 health items from Reise") thur.om <- omega(Thurstone,title="9 variables from Thurstone") #compare with thur.bf <- fa(Thurstone,3,rotate="biquartimin") factor.congruence(thur.om,thur.bf) } } \keyword{datasets} psych/man/outlier.Rd0000644000176200001440000000300512365750063014132 0ustar liggesusers\name{outlier} \alias{outlier} \title{Find and graph Mahalanobis squared distances to detect outliers} \description{ The Mahalanobis distance is \eqn{D^2 = (x-\mu)' \Sigma^-1 (x-\mu)} where \eqn{\Sigma} is the covariance of the x matrix. D2 may be used as a way of detecting outliers in distribution. Large D2 values, compared to the expected Chi Square values indicate an unusual response pattern. The mahalanobis function in stats does not handle missing data. } \usage{ outlier(x, plot = TRUE, bad = 5,na.rm = TRUE, xlab, ylab, ...) } \arguments{ \item{x}{A data matrix or data.frame} \item{plot}{Plot the resulting QQ graph} \item{bad}{Label the bad worst values} \item{na.rm}{Should missing data be deleted} \item{xlab}{Label for x axis} \item{ylab}{Label for y axis} \item{\dots}{More graphic parameters, e.g., cex=.8} } \details{ Adapted from the mahalanobis function and help page from stats. } \value{The D2 values for each case} \references{ Yuan, Ke-Hai and Zhong, Xiaoling, (2008) Outliers, Leverage Observations, and Influential Cases in Factor Analysis: Using Robust Procedures to Minimize Their Effect, Sociological Methodology, 38, 329-368. } \author{William Revelle} \seealso{ \code{\link{mahalanobis}} } \examples{ #first, just find and graph the outliers d2 <- outlier(sat.act) #combine with the data frame and plot it with the outliers highlighted in blue sat.d2 <- data.frame(sat.act,d2) pairs.panels(sat.d2,bg=c("yellow","blue")[(d2 > 25)+1],pch=21) } \keyword{multivariate } \keyword{ models } psych/man/tr.Rd0000644000176200001440000000125111027440031013056 0ustar liggesusers\name{tr} \alias{tr} \title{Find the trace of a square matrix} \description{Hardly worth coding, if it didn't appear in so many formulae in psychometrics, the trace of a (square) matrix is just the sum of the diagonal elements. } \usage{ tr(m) } \arguments{ \item{m}{A square matrix } } \details{The tr function is used in various matrix operations and is the sum of the diagonal elements of a matrix. } \value{ The sum of the diagonal elements of a square matrix. \cr i.e. tr(m) <- sum(diag(m)). } \examples{ m <- matrix(1:16,ncol=4) m tr(m) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ multivariate } psych/man/Harman.Rd0000644000176200001440000001573513663344674013703 0ustar liggesusers\name{Harman} \alias{Harman} \alias{Harman.Burt} \alias{Harman.Holzinger} \alias{Harman.political} \alias{Harman.5} \alias{Harman.8} \docType{data} \title{Five data sets from Harman (1967). 9 cognitive variables from Holzinger and 8 emotional variables from Burt} \description{Five classic data sets reported by Harman (1967) are 9 psychological (cognitive) variables taken from Holzinger and 8 emotional variables taken from Burt. Two others are socioeconomic and political data sets. Additionally, 8 physical variables. All five of these are used for tests and demonstrations of various factoring algortithms. } \usage{data(Harman) data(Harman.5) data(Harman.political) data(Harman.8) } \details{ \itemize{ \item Harman.Holzinger: 9 x 9 correlation matrix of ability tests, N = 696. \item Harman.Burt: a 8 x 8 correlation matrix of ``emotional" items. N = 172 \item Harman.5: 12 census tracts for 5 socioeconomic data (Harman p 14) \item Harman.political: p 166. \item Harman.8 8 physical measures } Harman.Holzinger. The nine psychological variables from Harman (1967, p 244) are taken from unpublished class notes of K.J. Holzinger with 696 participants. This is a subset of 12 tests with 4 factors. It is yet another nice example of a bifactor solution. Bentler (2007) uses this data set to discuss reliablity analysis. The data show a clear bifactor structure and are a nice example of the various estimates of reliability included in the \code{\link{omega}} function. Should not be confused with the \code{\link{Holzinger}} or \code{\link{Holzinger.9}} data sets in \code{\link{bifactor}}. See also the Holzinger-Swineford data set of 301 subjects with 26 variables in \code{\link[psychTools]{holzinger.swineford}}. These data were provided by Keith Widaman. "The Holzinger and Swineford (1939) data have been used as a model data set by many investigators. For example, Harman (1976) used the "24 Psychological Variables" example prominently in his authoritative text on multiple factor analysis, and the data presented under this rubric consisted of 24 of the variables from the Grant-White school (N = 145). Meredith (1964a, 1964b) used several variables from the Holzinger and Swineford study in his work on factorial invariance under selection. Joreskog (1971) based his work on multiple-group confirmatory factor analysis using the Holzinger and Swineford data, subsetting the data into four groups. Rosseel, who developed the "lavaan" package for R , included 9 of the manifest variables from Holzinger and Swineford (1939) as a "resident" data set when one downloads the lavaan package. Several background variables are included in this "resident" data set in addition to 9 of the psychological tests (which are named x1 - x9 in the data set). When analyzing these data, I found the distributions of the variables (means, SDs) did not match the sample statistics from the original article. For example, in the "resident" data set in lavaan, scores on all manifest variables ranged between 0 and 10, sample means varied between 3 and 6, and sample SDs varied between 1.0 and 1.5. In the original data set, scores ranges were rather different across tests, with some variables having scores that ranged between 0 and 20, but other manifest variables having scores ranging from 50 to over 300 - with obvious attendant differences in sample means and SDs." Harman.Burt. Eight ``emotional" variables are taken from Harman (1967, p 164) who in turn adapted them from Burt (1939). They are said be from 172 normal children aged nine to twelve. As pointed out by Harman, this correlation matrix is singular and has squared multiple correlations > 1. Because of this problem, it is a nice test case for various factoring algorithms. (For instance, omega will issue warning messages for fm="minres" or fm="pa" but will fail for fm="ml".) The Eight Physical Variables problem is taken from Harman (1976) and represents the correlations between eight physical variables for 305 girls. The two correlated clusters represent four measures of "lankiness" and then four measures of "stockiness". The original data were selected from 17 variables reported in an unpublished dissertation by Mullen (1939). Variable 6 ("Bitrochanteric diamter") is the distance between the outer points of the hips. The row names match the original Harman paper, the column names have been abbreviated. See also the \code{\link[psychTools]{usaf}} data set for other physical measurements. The \code{\link{fa}} solution for principal axes (fm="pa") matches the reported minres solution, as does the fm="minres". For those interested in teaching examples using various body measurements, see the body data set in the gclus package. The Burt data set probably has a typo in the original correlation matrix. Changing the Sorrow- Tenderness correlation from .87 to .81 makes the correlation positive definite. As pointed out by Jan DeLeeuw, the Burt data set is a subset of 8 variables from the original 11 reported by Burt in 1915. That matrix has the same problem. See \code{\link[psychTools]{burt}}. Other example data sets that are useful demonstrations of factor analysis are the seven bifactor examples in \code{\link{Bechtoldt}} and the 24 ability measures in \code{\link{Harman74.cor}} There are several other Harman examples in the psych package (i.e., \link{Harman.8}) as well as in the dataseta and GPArotation packages. The Harman 24 mental tests problem is in the basic datasets package at \link{Harman74.cor}. Other Harman data sets are 5 socioeconomic variables for 12 census tracts \link{Harman.5} used by John Loehlin as an example for EFA. Another one of the many Harman (1967) data sets is \link{Harman.political}. This contains 8 political variables taken over 147 election areas. The principal factor method with SMCs as communalities match those of table 8.18. The data are used by Dziubian and Shirkey as an example of the Kaiser-Meyer-Olkin test of factor adequacy. } \source{Harman (1967 p 164 and p 244.) H. Harman and W.Jones. (1966) Factor analysis by minimizing residuals (minres). Psychometrika, 31(3):351-368. } \references{Harman, Harry Horace (1967), Modern factor analysis. Chicago, University of Chicago Press. P.Bentler. Covariance structure models for maximal reliability of unit-weighted composites. In Handbook of latent variable and related models, pages 1--17. North Holland, 2007. Burt, C.General and Specific Factors underlying the Primary Emotions. Reports of the British Association for the Advancement of Science, 85th meeting, held in Manchester, September 7-11, 1915. London, John Murray, 1916, p. 694-696 (retrieved from the web at https://www.biodiversitylibrary.org/item/95822#790 } \seealso{ See also the original \code{\link[psychTools]{burt}} data set, the \link{Harman.5} and \link{Harman.political} data sets.} \examples{ data(Harman) cor.plot(Harman.Holzinger) cor.plot(Harman.Burt) smc(Harman.Burt) #note how this produces impossible results f2 <- fa(Harman.8,2, rotate="none") #minres matches Harman and Jones } \keyword{datasets} psych/man/predict.psych.Rd0000644000176200001440000001032213607131005015213 0ustar liggesusers\name{predict.psych} \alias{predict.psych} \title{Prediction function for factor analysis, principal components (pca), bestScales } \description{Finds predicted factor/component scores from a factor analysis or principal components analysis (pca) of data set A predicted to data set B. Predicted factor scores use the weights matrix used to find estimated factor scores, predicted components use the loadings matrix. Scores are either standardized with respect to the prediction sample or based upon the original data. Predicted scores from a bestScales model are based upon the statistics from the original sample. } \usage{ \method{predict}{psych}(object, data,old.data,options=NULL,missing=FALSE,impute="none",...) } \arguments{ \item{object}{the result of a factor analysis, principal components analysis (pca) or bestScales of data set A} \item{data}{Data set B, of the same number of variables as data set A.} \item{old.data}{if specified, the data set B will be standardized in terms of values from the old data. This is probably the preferred option. This is done automatically if object is from \code{\link{bestScales}} } \item{options}{scoring options for bestScales objects ("best.keys","weights","optimal.keys","optimal.weights")} \item{missing}{If missing=FALSE, cases with missing data are given NA scores, otherwise they are given the values based upon the wts x complete data } \item{impute}{Should missing cases be replaced by "means", "medians" or treated as missing ("none" is the default} \item{...}{More options to pass to predictions } } \value{ Predicted factor/components/criteria scores. If predicting from either \code{\link{fa}} or \code{\link{pca}},the scores are based upon standardized items where the standardization is either that of the original data (old.data) or of the prediction set. This latter case can lead to confusion if just a small number of predicted scores are found. If the object is from \code{\link{bestScales}}, unit weighted scales are found (by default) using the best.keys and the predicted scores are then put into the metric of the means and standard deviations of the derivation sample. Other scoring key options may be specified using the "options" parameter. Possible values are best.keys","weights","optimal.keys","optimal.weights". See \code{\link{bestScales}} for details. By default, predicted scores are found by the matrix product of the standardized data with the factor or regression weights. If missing is TRUE, then the predicted scores are the mean of the standardized data x weights for those data points that are not NA. } \author{William Revelle } \note{Thanks to Reinhold Hatzinger for the suggestion and request and to Sarah McDougald for the bestScales prediction.} \seealso{ \code{\link{fa}}, \code{\link{principal}}, \code{\link{bestScales}} } \examples{ set.seed(42) x <- sim.item(12,500) f2 <- fa(x[1:250,],2,scores="regression") # a two factor solution p2 <- principal(x[1:250,],2,scores=TRUE) # a two component solution round(cor(f2$scores,p2$scores),2) #correlate the components and factors from the A set #find the predicted scores (The B set) pf2 <- predict(f2,x[251:500,],x[1:250,]) #use the original data for standardization values pp2 <- predict(p2,x[251:500,],x[1:250,]) #standardized based upon the first set round(cor(pf2,pp2),2) #find the correlations in the B set #test how well these predicted scores match the factor scores from the second set fp2 <- fa(x[251:500,],2,scores=TRUE) round(cor(fp2$scores,pf2),2) pf2.n <- predict(f2,x[251:500,]) #Standardized based upon the new data set round(cor(fp2$scores,pf2.n)) #predict factors of set two from factors of set 1, factor order is arbitrary #note that the signs of the factors in the second set are arbitrary \donttest{ #predictions from bestScales #the derivation sample bs <- bestScales(psychTools::bfi[1:1400,], cs(gender,education,age),folds=10,p.keyed=.5) pred <- predict(bs,psychTools::bfi[1401:2800,]) #The prediction sample cor2(pred,psychTools::bfi[1401:2800,26:28] ) #the validity of the prediction summary(bs) #compare with bestScales cross validations } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{ models } psych/DESCRIPTION0000644000176200001440000000401414552221456013113 0ustar liggesusersPackage: psych Version: 2.4.1 Date: 2024-01-14 Title: Procedures for Psychological, Psychometric, and Personality Research Authors@R: person("William", "Revelle", role =c("aut","cre"), email="revelle@northwestern.edu", comment=c(ORCID = "0000-0003-4880-9610") ) Description: A general purpose toolbox developed originally for personality, psychometric theory and experimental psychology. Functions are primarily for multivariate analysis and scale construction using factor analysis, principal component analysis, cluster analysis and reliability analysis, although others provide basic descriptive statistics. Item Response Theory is done using factor analysis of tetrachoric and polychoric correlations. Functions for analyzing data at multiple levels include within and between group statistics, including correlations and factor analysis. Validation and cross validation of scales developed using basic machine learning algorithms are provided, as are functions for simulating and testing particular item and test structures. Several functions serve as a useful front end for structural equation modeling. Graphical displays of path diagrams, including mediation models, factor analysis and structural equation models are created using basic graphics. Some of the functions are written to support a book on psychometric theory as well as publications in personality research. For more information, see the web page. License: GPL (>= 2) Imports: mnormt,parallel,stats,graphics,grDevices,methods,lattice,nlme Suggests: psychTools, GPArotation, lavaan, lme4, Rcsdp, graph, knitr, Rgraphviz LazyData: yes ByteCompile: true VignetteBuilder: knitr URL: https://personality-project.org/r/psych/ https://personality-project.org/r/psych-manual.pdf NeedsCompilation: no Packaged: 2024-01-16 15:32:05 UTC; WR Author: William Revelle [aut, cre] () Maintainer: William Revelle Repository: CRAN Date/Publication: 2024-01-18 13:10:06 UTC psych/build/0000755000176200001440000000000014551520564012506 5ustar liggesuserspsych/build/vignette.rds0000644000176200001440000000034014551520564015042 0ustar liggesusersuQ0,QPNpbЃצi+mpElݙNfқґiH75\,Ldù䬸QQ+C 8«LJ4$]ҨDI_`NX ;,pN\E܍ bIĜaI{MU8m꒏:35p̐etrer Z-R}2ټ1׾l-,5<ɰP;1O8|Vepsych/build/partial.rdb0000644000176200001440000000007514551520546014635 0ustar liggesusersb```b`afb`b1 H020piּb C"%!7psych/tests/0000755000176200001440000000000014551520565012552 5ustar liggesuserspsych/tests/testthat/0000755000176200001440000000000014551520565014412 5ustar liggesuserspsych/tests/testthat/test_ICC.R0000644000176200001440000000177114510070772016173 0ustar liggesusers#Tests for ICC -- does it match the numbers from Shrout and Fleiss sf <- matrix(c( 9, 2, 5, 8, 6, 1, 3, 2, 8, 4, 6, 8, 7, 1, 2, 6, 10, 5, 6, 9, 6, 2, 4, 7),ncol=4,byrow=TRUE) colnames(sf) <- paste("J",1:4,sep="") rownames(sf) <- paste("S",1:6,sep="") sf #example from Shrout and Fleiss (1979) temp <- ICC(sf,lmer=FALSE) test_that("Shrout and Fleiss ICC(1,1)", expect_equal(round(temp$results[1,2],2),.17)) test_that("Shrout and Fleiss ICC(2,1)", expect_equal(round(temp$results[2,2],2),.29)) test_that("Shrout and Fleiss ICC(3,1)", expect_equal(round(temp$results[3,2],2),.71)) #NOW TEST THE K CONDITION test_that("Shrout and Fleiss ICC(1,4)", expect_equal(round(temp$results[4,2],2),.44)) test_that("Shrout and Fleiss ICC(2,4)", expect_equal(round(temp$results[5,2],2),.62)) test_that("Shrout and Fleiss ICC(3,4)", expect_equal(round(temp$results[6,2],2),.91)) # see https://www.r-bloggers.com/2019/11/automated-testing-with-testthat-in-practice/ for helppsych/tests/testthat/test_tetrachoric..r0000644000176200001440000000120314525441755020221 0ustar liggesusers#testthat for tetrachoric #Alsto tests that comorbidity and phi work #data from Kirk, 1973 Q1 = .5 Q2 = .5 t0 <- comorbidity(.5,.2275,.11375) # 0 t4 = comorbidity(.5,.5,.3154950) # .4 t5= comorbidity(.5,.5,.3333333) # .5 t8 <- comorbidity(.5,.5,.39758450) #.8 test_that("tetrachoric matchs Kirk", expect_equivalent(t0$tetra$rho,0)) test_that("tetrachoric matchs Kirk", expect_equivalent(t4$tetra$rho,.4,tolerance=10^-4)) test_that("tetrachoric matchs Kirk", expect_equivalent(t5$tetra$rho,.5,tolerance=10^-5)) test_that("tetrachoric matchs Kirk", expect_equivalent(t8$tetra$rho,.8,tolerance=10^-5)) psych/tests/testthat/test_lmCor.r0000644000176200001440000000315414510363213016701 0ustar liggesusers#Tests for lmCor -- does it match the output from lm? #also a test that the graphics engine works. mod <- (lm(rating ~ complaints + privileges, data = attitude)) mod1 <- lmCor(rating ~ complaints + privileges, data = attitude, std=FALSE, plot=FALSE) #do not standardize test_that("lm and lmCor agree)", expect_equivalent(mod$coefficients,mod1$coefficients)) z.attitude <- data.frame(scale(attitude)) #standardize the data before doing lm mod.z<-lm(rating ~ complaints + privileges, data = z.attitude) #regressions on z scores mod.z1 <- lmCor(rating ~ complaints + privileges, data = attitude, plot=FALSE) #by default we standardize and test_that("lm and lmCor agree for standardized scores)", expect_equivalent(mod.z$coefficients,mod.z1$coefficients)) # the results are the same as the standardized lm R <- cor(attitude) #find the correlations #Do the regression on the correlations #Note that these match the regressions on the standard scores of the data mod.r <- lmCor(rating ~ complaints + privileges, data =R, n.obs=30, plot=FALSE) test_that("lmCor with raw data and and lmCor for correlation matrices agree", expect_equivalent(mod.r$coefficients,mod.z1$coefficients[-1])) #no intercept #now, partial out learning and critical mod.par <- lmCor(rating ~ complaints + privileges - learning - critical, data =R, n.obs=30) #compare with the full regression: mod.full <- lmCor(rating ~ complaints + privileges + learning + critical, data =R, n.obs=30) test_that("lmCor with partial r match lmCor with full R agree", expect_equivalent(mod.full$coefficients[1:2],mod.par$coefficients)) #no intercept psych/tests/testthat/test_corPlot.r0000644000176200001440000000076314524555726017273 0ustar liggesusers#tests for various correlation functions #corPlot R <- corPlot(attitude) r <- cor(attitude) test_that("correlations from CorPlot match cor", expect_equivalent(R,r)) R <- corPlot(ability) r <- cor(ability,use="pairwise") test_that("correlations from CorPlot match cor with missing data", expect_equivalent(R,r)) R <- lowerCor(ability,show=FALSE) r <- cor(ability,use="pairwise") test_that("correlations from lowerCor match cor with missing data", expect_equivalent(R,r)) psych/tests/testthat/test_omega.r0000644000176200001440000000166114510362663016726 0ustar liggesusers#Tests for omega and for factor analysis #this is a test for sim.hierarchical, fa, and omega all in one #Uses the data model from Jensen and Weng (1994) #we simulate their data for a 3 lower order 1 higher factor using sim.hierarchical sim.jen <- sim.hierarchical() #the default is to simulate the Jensen-Weng model om <- omega(sim.jen,plot=FALSE) #do a factor analysis and then a higher level factoring loadings <- om$schmid$sl[,1:5] #from Jensen and Weng jensen <- data.frame(g= c(.72, .63, .54, .56, .48,.40,.42,.35,.28), F1 = c(.3487, .3051,.2615, 0,0,0,0,0,0), F2 = c(0,0,0,.42,.36,.30, 0,0,0), F3 = c(0,0,0,0,0,0,.428,.3570,.2856), h2= c(.64,.49,.36,.49,.36,.25,.36,.25, .16) ) diff <- loadings - jensen sum.diff <- sum(abs(diff)) test_that("omega output matches Jensen-Weng)", expect_equivalent(sum.diff,0,tolerance=.001)) psych/tests/testthat/ReadMe.text0000644000176200001440000000104714510365622016453 0ustar liggesusers#Read me for psych tests #Developed October, 2023 following help and request from Coen Bernard Some tests are based upon published models e.g. test_ICC tests matches with Shrout and Fleiss omega creates data to match Jensen and Weng, and then factors the resulting data see https://www.r-bloggers.com/2019/11/automated-testing-with-testthat-in-practice/ for help dn <- "/Users/WR/Library/CloudStorage/Dropbox/psych/tests/testthat" test_dir(dn) fn <- "/Users/WR/Library/CloudStorage/Dropbox/psych/tests/testthat/test_splitHalf.r" test_file(fn) psych/tests/testthat/test_splitHalf.r0000644000176200001440000000216414510311044017546 0ustar liggesusers#Tests for SplitHalf #First for an even set of items #with an obvious split R1 <- matrix(.36,3,3) R2 <- matrix(.25,3,3) g <- matrix(.16,6,6) R12 <- superMatrix(R1,R2) R <- g + R12 diag(R ) <- 1 dimnames(R) <-list( paste0("V",1:6),paste0("V",1:6)) sp <- splitHalf(R) worst.cov <- sum(R[c(1,2,3),c(4,5,6)]) worst.v1 <- sum(R[c(1,2,3), c(1,2,3)]) worst.v2 <- sum(R[c(4,5,6), c(4,5,6)]) worst.r <- worst.cov/sqrt(worst.v1*worst.v2) split.min <- 2 * worst.r /(1+worst.r ) best.cov <- sum(R[c(1,4,5),c(2,3,6)]) best.v1 <- sum(R[ c(1,4,5), c(1,4,5)]) best.v2 <- sum(R[c(2,3,6), c(2,3,6)]) best.r <- best.cov/sqrt(best.v1*best.v2) split.max <- best.r*2/(1+best.r) test_that("Max split half ", expect_equal(sp$maxrb,split.max,tolerance=.001)) test_that("Min split half", expect_equal(sp$minrb,split.min,tolerance=.001)) test_that("average r )", expect_equal(mean(R[lower.tri(R)]),sp$av.r)) #now find alpha from variances k <- 6 alpha <- (sum(R) -k)*(k/(k-1))/sum(R) test_that("alpha from variances", expect_equal(alpha,sp$alpha)) # see https://www.r-bloggers.com/2019/11/automated-testing-with-testthat-in-practice/ for helppsych/tests/testthat/test_mediate.r0000644000176200001440000001107014511122057017231 0ustar liggesusers#test the mediate function against published results from Hayes () # A simple mediation example is the Tal_Or data set (pmi for Hayes) #The pmi data set from Hayes is available as the Tal_Or data set. mod4 <- mediate(reaction ~ cond + (pmi), data =Tal_Or,n.iter=50) summary(mod4) #traditional regression (c') b.coeff <-c(.5249,.2544, .5064) t.coeff <-c(.9585,.9943,5.2183) test_that("traditional regression (c')", expect_equivalent( b.coeff, mod4$cprime.reg$beta[,1],tolerance=.002)) test_that("traditional regression (c') t ", expect_equivalent( t.coeff, mod4$cprime.reg$t,tolerance=.001)) #total effect reaction (c ) 3.25 .4957 b.coeff <- c( 3.25, .4957) t.coeff <- c( 17.0525, 1.7860) test_that("c path (direct effect-- intercept and slope)", expect_equivalent(mod4$total.reg$beta,b.coeff,tolerance=.001)) test_that("c path (direct effect-- t values)", expect_equivalent(mod4$total.reg$t,t.coeff,tolerance=.001)) #a effect X on M b.coeff <- c( 5.3769 , .4765 ) t.coeff <- c( 33.2222, 2.0218) test_that("a path (direct effect-- intercept and slope)", expect_equivalent(mod4$a.reg$beta,b.coeff,tolerance=.001)) test_that("a path (direct effect-- t values)", expect_equivalent(mod4$a.reg$t,t.coeff,tolerance=.001)) #b effect (M on Y) b.coeff <- c(.5064 ) t.coeff <- c(5.2185) test_that("b path M on Y) -- slope)", expect_equivalent(mod4$b.reg$beta[-1],b.coeff,tolerance=.001)) test_that("a path (direct effect-- t values)", expect_equivalent(mod4$b.reg$t[-1],t.coeff,tolerance=.001)) #ab effect (the indirect effect)) b.coeff <- c(.2413 ) test_that("ab on Y) -- slope)", expect_equivalent(mod4$ab,b.coeff,tolerance=.001)) test_that("a path (direct effect-- t values)", expect_equivalent(mod4$b.reg$t[-1],t.coeff,tolerance=.001)) ################ #Two mediators (from Hayes model 6 (chapter 5)) mod6 <- mediate(reaction ~ cond + (pmi) + (import), data =Tal_Or,n.iter=50) summary(mod6) #note this is the model on page 135 #traditional regression (c') b.coeff <-c(-.1498, .1034,.3965, .3244 ) t.coeff <-c(-.28287, .4324, 4.2645, 4.5857) test_that("traditional regression (c')", expect_equivalent( b.coeff, mod6$cprime.reg$beta[,1],tolerance=.002)) test_that("traditional regression (c') t ", expect_equivalent( t.coeff, mod6$cprime.reg$t,tolerance=.001)) #total effect reaction (c ) b.coeff <- c( 3.25, .4957) t.coeff <- c( 17.0525, 1.7860) test_that("c path (direct effect-- intercept and slope)", expect_equivalent(mod6$total.reg$beta,b.coeff,tolerance=.001)) test_that("c path (direct effect-- t values)", expect_equivalent(mod6$total.reg$t,t.coeff,tolerance=.001)) #a effect X on M b.coeff <- c(5.3769,.47653,3.9077, .6268) t.coeff <- c(33.222,2.0218, 18.3704, 2.0234) test_that("a path (direct effect-- intercept and slope)", expect_equivalent(mod6$a.reg$beta,b.coeff,tolerance=.001)) test_that("a path (direct effect-- t values)", expect_equivalent(mod6$a.reg$t,t.coeff,tolerance=.001)) #b effect (M on Y) b.coeff <- c(.5064 ) t.coeff <- c(5.2185) test_that("b path M on Y) -- slope)", expect_equivalent(mod4$b.reg$beta[-1],b.coeff,tolerance=.001)) test_that("a path (direct effect-- t values)", expect_equivalent(mod4$b.reg$t[-1],t.coeff,tolerance=.001)) #ab effect (the indirect effect)) b.coeff <- c(.3923, .1890, .2033 ) test_that("total ab on Y) -- slope)", expect_equivalent(mod6$ab,b.coeff[1],tolerance=.001)) test_that("each ab on Y) -- slope)", expect_equivalent(mod6$all.ab,b.coeff[2:3],tolerance=.001)) #Moderated mediation is done for the Garcia (Garcia, 2010) data set. # (see Hayes, 2013 for the protest data set #n.iter set to 50 (instead of default of 5000) for speed of example #no mediation, just an interaction #note that we do not zero center to be compatible with Hayes p 226 mod7 <- mediate(liking ~ sexism * prot2 , zero=FALSE,data=Garcia, n.iter = 50, plot=FALSE) beta=c(7.7062, -.4725, -3.7727,.8336) t.coeff = c(7.3750, -2.3184, -3.0084, 3.4224) test_that("Moderated regression with slopes and intercepts of", expect_equivalent(mod7$total.reg$beta,beta, tolerance=.001)) test_that("Moderated regression with t of", expect_equivalent(mod7$total.reg$t,t.coeff, tolerance=.001)) data(GSBE) #The Garcia et al data set (aka GSBE) mod11.4 <- mediate(liking ~ sexism * prot2 + (respappr), data=Garcia, n.iter = 50,zero=FALSE) #to match Hayes summary(mod11.4) #to see this interaction graphically, run the examples in ?Garcia psych/tests/testthat/test_lowercor.r0000644000176200001440000000030114525031452017453 0ustar liggesusers#testthat for lowerCor R <- lowerCor(ability) r <- cor(ability,use="pairwise") test_that("correlations from lowerCor match cor with missing data", expect_equivalent(R,r)) psych/tests/testthat/test_fa.r0000644000176200001440000000260714525441670016227 0ustar liggesusers#Tests for fa #compare with published solutions #and also compare to factanal #Various output from Harman #Harman.Burt page 206 #minres solution to a singular matrix minres <- data.frame(F1= c(.982,.935,.833,.720,.676,.526,.515,.355), F2 = c(.065,-.111,-.550,-.091,.330, .164,.583,-.128), h2 = c(.968, .888,.997,.526,.566,.304,.605,.142)) f2 <- fa(Harman.Burt,2, rotate="none",warnings=FALSE) #throws warnings about a bad matrix diff <- f2$loadings - minres[,1:2] test_that("Minresidual solution to Harman.Burt", expect_equal(max(abs(diff)),0,tolerance=.001)) #communalities h2.diff <- f2$communalities - rowSums(minres[,1:2]^2) #find the communalities based upon the tabled factor loadings test_that("Minres solution to Harman.Burt h2", expect_equal(max(abs(h2.diff)),0,tolerance=.002)) h2.diff1 <- f2$communalities - minres[,3] #use the communalities from the book test_that("Minres solution to Harman.Burt h2 from formula", expect_equal(max(abs(h2.diff1)),0,tolerance=.0025)) #### #compare with factanal f3 <- fa(Thurstone,3, fm="mle",rotate="varimax") f <- factanal(covmat=Thurstone,factors=3,rotation="varimax") test_that("fa loadings match those from factanal", expect_equivalent(f3$loadings,f$loading,tolerance=10^-5)) #further tests are done by testing omega which also involves factoring and rotations psych/vignettes/0000755000176200001440000000000014551520565013420 5ustar liggesuserspsych/vignettes/scoring.Rnw0000644000176200001440000007744114550614671015572 0ustar liggesusers% \VignetteIndexEntry{Scoring scales with psych} % \VignettePackage{psych} % \VignetteKeywords{multivariate} % \VignetteKeyword{models} % \VignetteKeyword{Hplot} %\VignetteDepends{psych} %\documentclass[doc]{apa} %% \VignetteEngine{knitr::knitr} %\VignetteEncoding{UTF-8} \documentclass[11pt]{article} %\documentclass[11pt]{amsart} \usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. \geometry{letterpaper} % ... or a4paper or a5paper or ... %\geometry{landscape} % Activate for for rotated page geometry \usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent \usepackage{graphicx} \usepackage{amssymb} \usepackage{epstopdf} \usepackage{mathptmx} \usepackage{helvet} \usepackage{courier} \usepackage{epstopdf} \usepackage{makeidx} % allows index generation \usepackage[authoryear,round]{natbib} \usepackage{gensymb} %\usepackage{longtable} %\usepackage{geometry} \usepackage{amssymb} \usepackage{amsmath} %\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png} %\usepackage{Sweave} %\usepackage{/Volumes/'Macintosh HD'/Library/Frameworks/R.framework/Versions/2.13/Resources/share/texmf/tex/latex/Sweave} %\usepackage[ae]{Rd} %\usepackage[usenames]{color} %\usepackage{setspace} \usepackage{fancyvrb} % to allow us to define Sinput \bibstyle{apacite} \bibliographystyle{apa} %this one plus author year seems to work? %\usepackage{hyperref} \usepackage[colorlinks=true,citecolor=blue]{hyperref} %this makes reference links hyperlinks in pdf! \DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png} \usepackage{multicol} % used for the two-column index \usepackage[bottom]{footmisc}% places footnotes at page bottom \let\proglang=\textsf \newcommand{\R}{\proglang{R}} %\newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\fun}[1]{{\texttt{#1}\index{#1}\index{R function!#1}}} \newcommand{\pfun}[1]{{\texttt{#1}\index{#1}\index{R function!#1}\index{R function!psych package!#1}}}\newcommand{\Rc}[1]{{\texttt{#1}}} %R command same as Robject \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpkg}[1]{{\textit{#1}\index{#1}\index{R package!#1}}} %different from pkg - which is better? \newcommand{\iemph}[1]{{\emph{#1}\index{#1}}} \newcommand{\wrc}[1]{\marginpar{\textcolor{blue}{#1}}} %bill's comments \newcommand{\wra}[1]{\textcolor{blue}{#1}} %bill's comments \newcommand{\ve}[1]{{\textbf{#1}}} %trying to get a vector command \usepackage{fancyvrb} %this allows fancy boxes \fvset{fontfamily=courier} \DefineVerbatimEnvironment{Routput}{Verbatim} %{fontsize=\scriptsize, xleftmargin=0.6cm} {fontseries=b,fontsize=\scriptsize, xleftmargin=0.1cm} \DefineVerbatimEnvironment{Soutput}{Verbatim} %{fontsize=\scriptsize, xleftmargin=0.6cm} {fontseries=b,fontsize=\scriptsize, xleftmargin=0.1cm} \DefineVerbatimEnvironment{Binput}{Verbatim} {fontseries=b, fontsize=\scriptsize,frame=single, label=\fbox{lavaan model syntax}, framesep=2mm} %\DefineShortVerb{\!} %%% generates error! %change the definition of Sinput from Sweave \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontseries=b, fontsize=\scriptsize, frame=single, label=\fbox{R code},xleftmargin=0pt, framesep=1mm} \DefineVerbatimEnvironment{Rinput}{Verbatim} %{fontsize=\scriptsize, frame=single, label=\fbox{R code}, framesep=1mm} {fontseries=b, fontsize=\scriptsize, frame=single, label=\fbox{R code},xleftmargin=0pt, framesep=1mm} \DefineVerbatimEnvironment{Link}{Verbatim} {fontseries=b, fontsize=\small, formatcom=\color{darkgreen}, xleftmargin=1.0cm} \DefineVerbatimEnvironment{Toutput}{Verbatim} {fontseries=b,fontsize=\tiny, xleftmargin=0.1cm} \DefineVerbatimEnvironment{rinput}{Verbatim} {fontseries=b, fontsize=\tiny, frame=single, label=\fbox{R code}, framesep=1mm} \newcommand{\citeti}[1]{\begin{tiny}\citep{#1}\end{tiny}} \newcommand{\light}[1]{\textcolor{gray}{#1}} \newcommand{\vect}[1]{\boldsymbol{#1}} \let\vec\vect \makeindex % used for the subject index \title{Using \R{} to score personality scales\footnote{Part of a set of tutorials for the psych package.}} \author{William Revelle\\Northwestern University} %the following works only with apaclass %\affiliation{Northwestern University } %\acknowledgements{\\ contact: William Revelle revelle@northwestern.edu \\ %Version of \today{} %} % % %\shorttitle{Scoring scales} %\rightheader{Scoring scales using \pfun{psych}} %\leftheader{PMC Lab} \begin{document} \maketitle \abstract{The \Rpkg{psych} package \citep{psych} was developed to perform most basic psychometric functions using \R{} \citep{R} A common problem is the need to take a set of items (e.g., a questionnaire) and score one or more scales on that questionnaire. Scores for subsequent analysis, reliabilities and intercorrelations are easily done using the \pfun{scoreItems} function.} \tableofcontents \section{Overview of this and related documents} To do basic and advanced personality and psychological research using \R{} is not as complicated as some think. This is one of a set of ``How To'' to do various things using \R{} \citep{R}, particularly using the \Rpkg{psych} \citep{psych} package. The current list of How To's includes: \begin{enumerate} \item An \href{https://personality-project.org/r/psych/intro.pdf}{introduction} (vignette) of the \Rpkg{psych} package \item An \href{https://personality-project.org/r/psych/overview.pdf}{overview} (vignette) of the \Rpkg{psych} package \item \href{https://personality-project.org/r/psych/HowTo/getting_started.pdf}{Installing} \R{} and some useful packages \item Using \R{} and the \Rpkg{psych} package to find \href{https://personality-project.org/r/psych/HowTo/omega.pdf}{$omega_h$} and $\omega_t$. \item Using \R{} and the \Rpkg{psych} for \href{https://personality-project.org/r/psych/HowTo/factor.pdf}{factor analysis} and principal components analysis. \item Using the \pfun{scoreItems} function to find \href{https://personality-project.org/r/psych/HowTo/scoring.pdf}{scale scores and scale statistics} (this document). \item Using \pfun{mediate} and \pfun{lmCor} to do \href{https://personality-project.org/r/psych/HowTo/mediation.pdf}{mediation, moderation and regression analysis} \end{enumerate} By following these simple guides, you soon will be able to do such things as find scale scores by issuing just five lines of code:\\ \begin{Rinput} library(psych) #necessary whenever you want to run functions in the psych package my.data <- read.clipboard() my.keys <- make.keys(my.data,list(scale1 = c(1,4,5),scale2 = c(2,3,6)) #etc my.scales <- scoreItems(my.keys,my.data) my.scales \end{Rinput} The resulting output will be both graphical and textual. This guide helps the naive \R{} user to issue those three lines. Be careful, for once you start using \R, you will want to do more. Suppose you have given a questionnaire with some items (n) to some participants (N). You would like to create scale scores for each person on k different scales. This may be done using the \Rpkg{psych} package in \R{}. The following assumes that you have installed \R{} and downloaded the \Rpkg{psych} package. \section{Overview for the impatient} Remember, before using \Rpkg{psych} and the \Rpkg{psychTools} packages you must make them active:\\ <>= library(psych) library(psychTools) @ Then \begin{enumerate} \item Enter the data into a spreadsheet (Excel or Numbers) or a text file using a text editor (Word, Pages, BBEdit). The first line of the file should include names for the variables (e.g., Q1, Q2, ... Qn). \item Copy the data to the clipboard (using the normal copy command for your spreadsheet or word processor) or save it as .txt or .csv file. \item Read the data into \R{} using the read.clipboard command. (Depending upon your data file, this might need to be read.clipboard.csv (for comma separated data fields) or read.clipboard.tab (for tab separated data fields). \item Or, if you have a data file already that end in .sav, .text, .txt, .csv, .xpt, .rds, .Rds, .rda, or .RDATA, then just read it in directly using \pfun{read.file} (from \Rpkg{psychTools}).. \item Construct a set of scoring keys for the scales you want to score using the \pfun{make.keys} function. This is simply the item names or numbers that go into each scale. A negative sign implies that the item will be reverse scored. \item Use the \pfun{scoreItems} function to score the scales. \item Use the output for \pfun{scoreItems} for further analysis. \end{enumerate} \section{An example} Suppose we have 12 items for 200 subjects. The items represent 4 different scales: Positive Energetic Arousal (EAp), Negative Energetic Arousal (EAn), Tense Arousal (TAp) and negative Tense Arousal (TAn, also known as being relaxed). These four scales can also be thought of a forming two higher order constructs, Energetic Arousal (EA) and Tense Arousal (TA). EA is just EAp - EAn, and similarly TA is just TAp - TAn. \subsection{Getting the data} There are, of course, many ways to enter data into \R. \subsubsection{Reading from a local file} Reading from a local file using \fun{read.table} is perhaps the most common procedure. You first need to find the file and then read it. This can be done with the \fun{file.choose} and \fun{read.table} functions. \fun{file.choose} opens a search window on your system just like any open file command does. It doesn't actually read the file, it just finds the file. The read command is also necessary. \\ \begin{Rinput} file.name <- file.choose() my.data <- read.table(file.name) \end{Rinput} Even easier is to use the \pfun{read.file} function which combines the \fun{file.chooose} and \fun{read.table} functions into one function. In addition, \pfun{read.file} will read normal text (txt) files, comma separated files (csv), SPSS (sav) files as well as files saved by \R (rds) files. By default, it assumes that the first line of the file has header information (variable names). \pfun{read.file} is included in the \Rpkg{psychTools} package which needs to be installed (once) and made active using the \pfun{library} function. \\ \begin{Rinput} my.data <- read.file() #locate the file to read using your normal system. \end{Rinput} \subsection{Reading from a remote file} To read from a file saved on a remote server, you just need to specify the file name and then read it. By using the \pfun{read.file} function, you can read a variety of file types (e.g., text, txt, csv, sav, rds) from the remote server. e.g., to read the file \pfun{https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt} we just ~\ <<>>= file.name <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt" my.data <- read.file(file.name) @ \subsubsection{Read from the clipboard} Many users find it more convenient to enter their data in a text editor or spreadsheet program and then just copy and paste into \R{}. The \pfun{read.clipboard} set of functions are perhaps more user friendly. These functions are included in the \Rpkg{psychTools} package: \begin{description} \item [\pfun{read.clipboard}] is the base function for reading data from the clipboard. \item [\pfun{read.clipboard.csv}] for reading text that is comma delimited. \item [\pfun{read.clipboard.tab}] for reading text that is tab delimited (e.g., copied directly from an Excel file). \item [\pfun{read.clipboard.lower}] for reading input of a lower triangular matrix with or without a diagonal. The resulting object is a square matrix. \item [\pfun{read.clipboard.upper}] for reading input of an upper triangular matrix. \item[\pfun{read.clipboard.fwf}] for reading in fixed width fields (some very old data sets) \end{description} For example, given a data set copied to the clipboard from a spreadsheet, just enter the command.\\ \begin{Rinput} my.data <- read.clipboard() #note the parentheses \end{Rinput} This will work if every data field has a value and even missing data are given some values (e.g., NA or -999). If the data were entered in a spreadsheet and the missing values were just empty cells, then the data should be read in as a tab delimited or by using the \pfun{read.clipboard.tab} function. \begin{Rinput} my.tab.data <- read.clipboard.tab() #This is for data from a spreadsheet \end{Rinput} For the case of data in fixed width fields (some old data sets tend to have this format), copy to the clipboard and then specify the width of each field (in the example below, the first variable is 5 columns, the second is 2 columns, the next 5 are 1 column the last 4 are 3 columns). \begin{Rinput} my.data <- read.clipboard.fwf(widths=c(5,2,rep(1,5),rep(3,4)) \end{Rinput} \subsubsection{An example data set} Consider the data stored at \href{https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt}{a remote data location}. (This is the same file that we read directly above.) Open the file in your browser and then select all. Read them into the clipboard and go. (These data are selected variables from the first 200 cases from the \pfun{msqR} data set in the \Rpkg{psychTools} package). Once you have read the data, it useful to see how many cases and how many variables you have \Rfunction{dim} and to find some basic descriptive statistics. \\ \begin{Rinput} my.data <- read.clipboard() headTail(my.data) dim(my.data) describe(my.data) \end{Rinput} \begin{Routput} > headTail(my.data) active alert aroused sleepy tired drowsy anxious jittery nervous calm relaxed at.ease 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 0 1 1 1 0 0 0 1 1 1 3 1 0 0 0 1 0 0 0 0 1 2 2 4 1 1 1 1 1 1 1 3 2 1 2 1 ... ... ... ... ... ... ... ... ... ... ... ... ... 197 1 1 0 1 2 1 0 0 0 1 1 1 198 2 2 0 0 1 0 1 0 0 2 3 3 199 1 3 0 1 0 1 0 1 0 3 3 3 200 1 2 0 1 1 0 0 0 0 2 3 3 > dim(my.data) [1] 200 12 \end{Routput} ~\ <<>>= fn <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq" my.data <- read.file(fn, filetype="txt") #because the suffix is not a standard one, we need to specify the file type dim(my.data) #same as before headTail(my.data) #same as before describe(my.data) @ \subsection{Reading data from a Qualtrics data set} If you have used Qualtrics to collect your data, you can export the data as a csv data file. Unfortunately, this file is poorly organized and has one too many header lines. You can open the file using a spreadsheet program (e.g. Excel) and then change the line above the data to be item labels (e.g., Q1, Q2, ....). Then select that line and all the lines of data that you want to read, and use the \pfun{read.clipboard.tab} function (see above). \section{Scoring scales: an example} To score particular items on particular scales, we must create a set of \emph{scoring keys}. These simply tell us which items go on which scales. Note that we can have scales with overlapping items. There are several ways of creating keys. Probably the most intuitive is just to make up a list of keys. For example, make up keys to score Energetic Arousal, Activated Arousal, Deactivated Arousal ... We can do this by specifying the names of the items or their location. For demonstration purposes, we do this both ways. ~\ <>= my.keys <- list(EA= c("active","alert","aroused", "-sleepy","-tired", "-drowsy"), TA = c("anxious","jittery","nervous","-calm", "-relaxed", "-at.ease"), EAp = c("active","alert","aroused"), EAn = c("sleepy","tired", "drowsy"), TAp = c("anxious","jittery","nervous"), TAn = c("calm", "relaxed", "at.ease") ) another.keys.list <- list(EA=c(1:3,-4,-5,-6),TA=c(7:9,-10,-11,-12), EAp =1:3,EAn=4:6,TAp =7:9,TAn=10:12) @ \begin{tiny} Earlier versions of \pfun{scoreItems} required a \pfun{keys} matrix created by \pfun{make.keys}. This is no longer necessary. Two things to note. The number of variables is the total number of variables (columns) in the data file. You do not need to include all of these items in the scoring keys, but you need to say how many there are. For the keys, items are scored either +1, -1 or 0 (not scored). Just specify the items to score and their direction. What is actually done internally in the \pfun{scoreItem} function is the keys.list are converted to a matrix of -1s, 0s, and 1s to represent the scoring keys and this multiplied by the data. \end{tiny} % % % %\begin{Rinput} %my.keys <- make.keys(my.data,list(EA=c(1:3,-4,-5,-6),TA=c(7:9,-10,-11,-12), % EAp =1:3,EAn=4:6,TAp =7:9,TAn=10:12)) %\end{Rinput} %These keys are shown in Table~\ref{tab:keys}. % %\begin{table}[htpb]\caption{Scoring keys made by using the \pfun{make.keys} function. } %\begin{center} %\begin{scriptsize} %\begin{tabular} {l r r r r r r } % \multicolumn{ 6 }{l}{ } \cr % \hline Variable & EA & TA & EAp & EAn & TAp & TAn \cr % \hline %active & 1 & 0 & 1 & 0 & 0 & 0 \cr % alert & 1 & 0 & 1 & 0 & 0 & 0 \cr % aroused & 1 & 0 & 1 & 0 & 0 & 0 \cr % sleepy & -1 & 0 & 0 & 1 & 0 & 0 \cr % tired & -1 & 0 & 0 & 1 & 0 & 0 \cr % drowsy & -1 & 0 & 0 & 1 & 0 & 0 \cr % anxious & 0 & 1 & 0 & 0 & 1 & 0 \cr % jittery & 0 & 1 & 0 & 0 & 1 & 0 \cr % nervous & 0 & 1 & 0 & 0 & 1 & 0 \cr % calm & 0 & -1 & 0 & 0 & 0 & 1 \cr % relaxed & 0 & -1 & 0 & 0 & 0 & 1 \cr % at.ease & 0 & -1 & 0 & 0 & 0 & 1 \cr % \hline %\end{tabular} %\end{scriptsize} %\end{center} %\label{tab:keys} %\end{table} % my.scales <- scoreItems(my.keys.list,my.data) Now, we use those keys to score the data using \pfun{scoreItems}:\\ These three commands allow you to score the scales, find various descriptive statistics (such as coefficient $\alpha$), the scale intercorrelations, and also to get the scale scores. <>= my.scales <- scoreItems(my.keys,my.data) my.scales #show the output my.scores <- my.scales$scores #the actual scores are saved in the scores object @ %\begin{Routput} % %> my.scales <- scoreItems(my.keys.list,my.data) %> my.scales #show the output %Call: scoreItems(keys = my.keys, items = my.data) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.84 0.74 0.72 0.91 0.66 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.035 0.044 0.071 0.052 0.077 0.063 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.46 0.33 0.46 0.76 0.39 0.58 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.88 0.79 0.71 0.88 0.61 0.77 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 5.2 2.9 2.6 9.7 1.9 4.1 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.8387 -0.30 1.00 -1.06 -0.0045 0.38 %TA -0.2357 0.74 -0.26 0.26 1.0011 -1.17 %EAp 0.7819 -0.19 0.72 -0.59 0.2350 0.45 %EAn -0.9208 0.21 -0.48 0.91 0.1373 -0.26 %TAp -0.0033 0.70 0.16 0.11 0.6574 -0.45 %TAn 0.3100 -0.90 0.35 -0.22 -0.3272 0.80 % % In order to see the item by scale loadings and frequency counts of the data % print with the short option = FALSE % %\end{Routput} Two things to notice about this output is a) the message about how to get more information (item by scale correlations and frequency counts) and b) that the correlation matrix between the six scales has the raw correlations below the diagonal, alpha reliabilities on the diagonal, and correlations adjusted for reliability above the diagonal. Because EAp and EAn are both part of EA, they correlate with the total more than would be expected given their reliability. Hence the impossible values greater than $|1.0 |$. \subsection{Long output}To get the scale correlations corrected for item overlap and scale reliability, we print the object that we found, but ask for long output.\\ <>= print(my.scales,short=FALSE) @ %\begin{Routput} %> print(my.scales,short=FALSE) %Call: scoreItems(keys = my.keys, items = my.data) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.84 0.74 0.72 0.91 0.66 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.035 0.044 0.071 0.052 0.077 0.063 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.46 0.33 0.46 0.76 0.39 0.58 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.88 0.79 0.71 0.88 0.61 0.77 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 5.2 2.9 2.6 9.7 1.9 4.1 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.8387 -0.30 1.00 -1.06 -0.0045 0.38 %TA -0.2357 0.74 -0.26 0.26 1.0011 -1.17 %EAp 0.7819 -0.19 0.72 -0.59 0.2350 0.45 %EAn -0.9208 0.21 -0.48 0.91 0.1373 -0.26 %TAp -0.0033 0.70 0.16 0.11 0.6574 -0.45 %TAn 0.3100 -0.90 0.35 -0.22 -0.3272 0.80 % %Item by scale correlations: % corrected for item overlap and scale reliability % EA TA EAp EAn TAp TAn %active 0.64 -0.25 0.78 -0.47 0.10 0.39 %alert 0.58 -0.26 0.63 -0.47 0.08 0.39 %aroused 0.44 0.04 0.62 -0.27 0.36 0.14 %sleepy -0.85 0.22 -0.55 0.89 0.12 -0.23 %tired -0.82 0.29 -0.55 0.85 0.16 -0.30 %drowsy -0.78 0.16 -0.46 0.84 0.09 -0.17 %anxious -0.11 0.34 0.00 0.15 0.50 -0.19 %jittery 0.03 0.48 0.21 0.07 0.62 -0.31 %nervous 0.05 0.53 0.22 0.05 0.68 -0.35 %calm 0.11 -0.65 0.24 -0.02 -0.35 0.68 %relaxed 0.33 -0.71 0.34 -0.28 -0.37 0.76 %at.ease 0.39 -0.72 0.47 -0.29 -0.34 0.79 % %Non missing response frequency for each item % 0 1 2 3 miss %active 0.49 0.35 0.13 0.04 0.01 %alert 0.37 0.46 0.16 0.02 0.02 %aroused 0.70 0.23 0.07 0.01 0.01 %sleepy 0.13 0.30 0.25 0.33 0.01 %tired 0.12 0.23 0.33 0.33 0.01 %drowsy 0.17 0.30 0.27 0.25 0.01 %anxious 0.68 0.25 0.04 0.03 0.50 %jittery 0.69 0.26 0.04 0.01 0.01 %nervous 0.78 0.17 0.04 0.02 0.01 %calm 0.09 0.37 0.33 0.21 0.01 %relaxed 0.09 0.35 0.37 0.20 0.01 %at.ease 0.13 0.38 0.32 0.17 0.01> % \end{Routput} \subsection{Correcting for overlapping items across scales} The \pfun{scoreOverlap} function will correct for item overlap. In the case of overlapping keys, (items being scored on multiple scales), scoreOverlap will adjust for this overlap by replacing the overlapping covariances (which are variances when overlapping) with the corresponding best estimate of an item's ``true" variance using either the average correlation or the smc estimate for that item. This parallels the operation done when finding alpha reliability. This is similar to ideas suggested by \cite{cureton:66} and \cite{bashaw:anderson:67} but uses the smc or the average interitem correlation (default). \begin{Rinput} scales.ov <- scoreOverlap(my.keys,my.data) scales.ov \end{Rinput} \begin{Routput} scales Call: scoreOverlap(keys = my.keys, r = my.data) (Standardized) Alpha: EA TA EAp EAn TAp TAn 0.83 0.76 0.72 0.91 0.73 0.81 (Standardized) G6*: EA TA EAp EAn TAp TAn 0.66 0.61 0.72 0.88 0.69 0.77 Average item correlation: EA TA EAp EAn TAp TAn 0.45 0.34 0.47 0.76 0.48 0.58 Number of items: EA TA EAp EAn TAp TAn 6 6 3 3 3 3 Signal to Noise ratio based upon average r and n EA TA EAp EAn TAp TAn 5.0 3.1 2.6 9.8 2.7 4.1 Scale intercorrelations corrected for item overlap and attenuation adjusted for overlap correlations below the diagonal, alpha on the diagonal corrected correlations above the diagonal: EA TA EAp EAn TAp TAn EA 0.833 -0.211 0.88 -0.893 0.073 0.39 TA -0.168 0.758 -0.12 0.234 0.831 -0.85 EAp 0.684 -0.086 0.72 -0.579 0.286 0.44 EAn -0.776 0.194 -0.47 0.907 0.111 -0.26 TAp 0.057 0.619 0.21 0.091 0.733 -0.42 TAn 0.320 -0.661 0.34 -0.222 -0.320 0.81 In order to see the item by scale loadings and frequency counts of the data print with the short option = FALSE \end{Routput} \subsection{Get the actual scores for analysis.} Although we would probably not look at the raw scores, we can if we want by asking for the scores object which is part of the my.scales output. For printing purposes, we round them to two decimal places for compactness. We just look at first 10 cases.\\ <>= my.scores <- my.scales$scores headTail(round(my.scores,2) ) @ \subsection{The example, continued} Once you have the results, you should probably want to \pfun{describe} them and also show a graphic of the scatterplot using the \pfun{pairs.panels} function (Figure~\ref{fig.splom}). (Note that for the figure, we set the plot character to be '.' so that it makes a cleaner plot.)\\ ~\ <>= describe(my.scores) pairs.panels(my.scores,pch='.') @ ~\ <>= png('splom.png') pairs.panels(my.scores,pch='.') dev.off() @ \begin{figure}[htbp] \begin{center} \includegraphics{splom.png} \caption{A simple scatter plot matrix shows the histograms for the variables on the diagonal, the correlations above the diagonal, and the scatter plots below the diagonal. The best fitting loess regression is shown as the red line.} \label{fig.splom} \end{center} \end{figure} \section{Exploring a real data set} The 12 mood items for 200 subjects were taken from the much larger data set, \pfun{msq} in the \Rpkg{psych} package. That data set has 92 variables for 3896 subjects. We can repeat our analysis of EA and TA on that data set. This is a data set collected over about 10 years at the Personality, Motivation and Cognition laboratory at Northwestern and described by \cite{ari:97} and \cite{rafaeli:rev:06}. \subsection{Conventional reliability and scoring} First we get the data for the items that match our small example. Then we describe the data, and finally, find the 6 scales as we did before. Note that the colnames of our small sample are the items we want to pick from the larger set. Another way to choose some items is to use the \pfun{selectFromKeys} function.\\ <>= select <- colnames(my.data) #or select <- selectFromKeys(my.keys) small.msq <- msq[select] describe(small.msq) @ ~\ <>= msq.scales <- scoreItems(my.keys,small.msq) msq.scales #show the output @ % describe(small.msq) % vars n mean sd median trimmed mad min max range skew kurtosis se %active 1 3890 1.03 0.93 1 0.95 1.48 0 3 3 0.47 -0.76 0.01 %alert 2 3885 1.15 0.91 1 1.09 1.48 0 3 3 0.33 -0.76 0.01 %aroused 3 3890 0.71 0.85 0 0.59 0.00 0 3 3 0.95 -0.04 0.01 %sleepy 4 3880 1.25 1.05 1 1.18 1.48 0 3 3 0.40 -1.04 0.02 %tired 5 3886 1.39 1.04 1 1.36 1.48 0 3 3 0.22 -1.10 0.02 %drowsy 6 3884 1.16 1.03 1 1.08 1.48 0 3 3 0.46 -0.93 0.02 %anxious 7 2047 0.67 0.86 0 0.54 0.00 0 3 3 1.09 0.26 0.02 %jittery 8 3890 0.59 0.80 0 0.45 0.00 0 3 3 1.24 0.81 0.01 %nervous 9 3879 0.35 0.65 0 0.22 0.00 0 3 3 1.93 3.47 0.01 %calm 10 3814 1.55 0.92 2 1.56 1.48 0 3 3 -0.01 -0.83 0.01 %relaxed 11 3889 1.68 0.88 2 1.72 1.48 0 3 3 -0.17 -0.68 0.01 %at.ease 12 3879 1.59 0.92 2 1.61 1.48 0 3 3 -0.09 -0.83 0.01 % % %Call: scoreItems(keys = my.keys.list, items = small.msq) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.87 0.75 0.81 0.93 0.64 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.0071 0.0099 0.014 0.011 0.018 0.014 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.54 0.34 0.58 0.81 0.37 0.57 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.9 0.77 0.76 0.9 0.59 0.74 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 6.9 3 4.1 13 1.8 4 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.874 -0.0207 1.004 -1.006 0.218 0.168 %TA -0.017 0.7515 -0.011 0.024 1.096 -1.140 %EAp 0.842 -0.0084 0.806 -0.618 0.360 0.246 %EAn -0.906 0.0197 -0.534 0.927 -0.067 -0.076 %TAp 0.163 0.7590 0.258 -0.052 0.638 -0.512 %TAn 0.141 -0.8837 0.198 -0.065 -0.366 0.800 % % In order to see the item by scale loadings and frequency counts of the data % print with the short option = FALSE % % %\end{Routput} \subsection{Show the correction for overlap} <>= msq.scales.ov <- scoreOverlap(my.keys,small.msq) msq.scales.ov #show the output @ \section{Even more analysis: Factoring, clustering, and more tutorials} Far more analyses could be done with these data, but the basic scale scoring techniques is a start. Download the \href{https://cran.r-project.org/web/packages/psych/vignettes/overview.pdf}{vignette} for using \Rpkg{psych} for even more guidance. On a Mac, this is also available in the vignettes list in the help menu. In addition, look at the examples in the help for \pfun{scoreItems}. For examples and tutorials in how to do \emph{factor analysis} or to find \emph{coefficient omega} see the \href{https://personality-project.org/r/psych/HowTo/factor.pdf}{factor analysis tutorial} or the \href{:https//personality-project.org/r/psych/HowTo/omega.pdf}{omega tutorial}. \section{Session Information} <>= sessionInfo() @ \newpage %\bibliography{all} %\bibliography{../../../all} \begin{thebibliography}{} \bibitem[\protect\astroncite{Bashaw and Anderson~Jr}{1967}]{bashaw:anderson:67} Bashaw, W. and Anderson~Jr, H.~E. (1967). \newblock A correction for replicated error in correlation coefficients. \newblock {\em Psychometrika}, 32(4):435--441. \bibitem[\protect\astroncite{Cureton}{1966}]{cureton:66} Cureton, E. (1966). \newblock Corrected item-test correlations. \newblock {\em Psychometrika}, 31(1):93--96. \bibitem[\protect\astroncite{{R Core Team}}{2023}]{R} {R Core Team} (2023). \newblock {\em R: A Language and Environment for Statistical Computing}. \newblock R Foundation for Statistical Computing, Vienna, Austria. \bibitem[\protect\astroncite{Rafaeli and Revelle}{2006}]{rafaeli:rev:06} Rafaeli, E. and Revelle, W. (2006). \newblock A premature consensus: Are happiness and sadness truly opposite affects? \newblock {\em Motivation and Emotion}, 30(1):1--12. \bibitem[\protect\astroncite{Revelle}{2023}]{psych} Revelle, W. (2023). \newblock {\em psych: Procedures for Personality and Psychological Research}. \newblock Northwestern University, Evanston, https://CRAN.r-project.org/package=psych. \newblock R package version 2.3.5. \bibitem[\protect\astroncite{Revelle and Anderson}{1997}]{ari:97} Revelle, W. and Anderson, K.~J. (1997). \newblock Personality, motivation and cognitive performance. final report to the {Army} {Research} {Institute} on contract {MDA 903-93-K-0008}. \newblock Technical report, Northwestern University:. \end{thebibliography} \printindex \end{document} psych/R/0000755000176200001440000000000014551520565011611 5ustar liggesuserspsych/R/winsor.R0000644000176200001440000000463313124466454013264 0ustar liggesusers"winsor" <- function(x, trim=.2,na.rm=TRUE) { if(is.vector(x) ) { ans <- wins(x,trim=trim,na.rm=na.rm) } else { if (is.matrix(x) | is.data.frame(x)) {ans <- apply(x,2,wins,trim=trim,na.rm=na.rm) } } return(ans) } "winsor.means" <- function(x, trim=.2,na.rm=TRUE) { if(is.vector(x) ) { ans <- win.mean(x,trim=trim,na.rm=na.rm) } else { if (is.matrix(x) | is.data.frame(x)) {ans <- apply(x,2,win.mean,trim=trim,na.rm=na.rm) } } return(ans) } "winsor.mean" <- function(x, trim=.2,na.rm=TRUE) { if(is.vector(x) ) { ans <- win.mean(x,trim=trim,na.rm=na.rm) } else { if (is.matrix(x) | is.data.frame(x)) {ans <- apply(x,2,win.mean,trim=trim,na.rm=na.rm) } } return(ans) } "winsor.var" <- function(x, trim=.2,na.rm=TRUE) { if(is.vector(x) ) { ans <- win.var(x,trim=trim,na.rm=na.rm) } else { if (is.matrix(x) | is.data.frame(x)) {ans <- apply(x,2,win.var,trim=trim,na.rm=na.rm) } } return(ans) } "winsor.sd" <- function(x, trim=.2,na.rm=TRUE) { if(is.vector(x) ) { ans <- sqrt(win.var(x,trim=trim,na.rm=na.rm) )} else { if (is.matrix(x) | is.data.frame(x)) {ans <- apply(x,2,win.var,trim=trim,na.rm=na.rm) ans <- sqrt(ans) } } return(ans) } #added winsor.var and winsor.sd and winsor.mean (to supplement winsor.means) August 28, 2009 following a suggestion by Jim Lemon #corrected January 15, 2009 to use the quantile function rather than sorting. #suggested by Michael Conklin in correspondence with Karl Healey #this preserves the order of the data "wins" <- function(x,trim=.2, na.rm=TRUE) { if ((trim < 0) | (trim>0.5) ) stop("trimming must be reasonable") qtrim <- quantile(x,c(trim,.5, 1-trim),na.rm = na.rm) xbot <- qtrim[1] xtop <- qtrim[3] if(trim<.5) { x[x < xbot] <- xbot x[x > xtop] <- xtop} else {x[!is.na(x)] <- qtrim[2]} return(x) } "win.mean" <- function(x,trim=.2, na.rm=TRUE) { if ((trim < 0) | (trim>0.5) ) stop("trimming must be reasonable") if (trim < .5) { ans <- mean(wins(x,trim =trim,na.rm=na.rm),na.rm=na.rm) return(ans)} else {return(median(x,na.rm=TRUE))} } "win.var" <- function(x,trim=.2, na.rm=TRUE) { if ((trim < 0) | (trim > 0.5) ) {stop("trimming must be reasonable")} if (trim < .5) { ans <- var(wins(x,trim =trim,na.rm=na.rm),na.rm=na.rm) return(ans) } else {return(median(x,na.rm=TRUE)) } }psych/R/simulation.circ.R0000644000176200001440000000231212253363036015030 0ustar liggesusers"simulation.circ" <- function(samplesize=c(100,200,400,800), numberofvariables=c(16,32,48,72)) { ncases=length(samplesize) nvar <- length(numberofvariables) results <- matrix(NaN,ncol=ncases,nrow=nvar*ncases) results.ls <- list() case <- 1 for (ss in 1:ncases) { for (nv in 1:nvar) { circ.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss]) sim.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss],circum=FALSE) elipse.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss],yloading=.4) r.circ<- cor(circ.data) r.sim <- cor(sim.data) r.elipse <- cor(elipse.data) pc.circ <- principal(r.circ,2) pc.sim <- principal(r.sim,2) pc.elipse <- principal(r.elipse,2) case <- case + 1 results.ls[[case]] <- list(numberofvariables[nv],samplesize[ss],circ.tests(pc.circ)[1:4],circ.tests(pc.elipse)[1:4],circ.tests(pc.sim)[1:4]) } } results.mat <- matrix(unlist(results.ls),ncol=14,byrow=TRUE) colnames(results.mat) <- c("nvar","n","c-gap","c-fisher","c-RT","c-VT","e-gap","e-fisher","e-RT","e-VT","s-gap","s-fisher","s-RT","s-VT") results.df <- data.frame(results.mat) return(results.df) }psych/R/error.dots.r0000644000176200001440000003010414371222735014071 0ustar liggesusers#adapted (taken) from dotchart with some minor addition of confidence intervals and to interface with statsBy, describeBy and cohen.d #July 17, 2016 #input is the mean + standard errors, and (optionally, alpha) #August 12, added the ability to find (and save) the stats using describe or describeBy #Modified Oct, 4, 2019 to include cohen.d values #corrected April 27, 2022 to correctly call Item in dictionary "error.dots" <- function (x=NULL,var=NULL, se=NULL, group=NULL,sd=FALSE, effect=NULL, stats=NULL, head = 12, tail = 12, sort=TRUE,decreasing=TRUE,main=NULL, alpha=.05,eyes=FALSE, items=FALSE, min.n = NULL,max.labels =40, labels = NULL, label.width=NULL, select=NULL, groups = NULL, gdata = NULL, cex = par("cex"), pt.cex = cex, pch = 21, gpch = 21, bg = par("bg"), fg=par("fg"), color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlab = NULL, ylab = NULL,xlim=NULL,add=FALSE,order=NULL, ...) { opar <- par("mai", "mar", "cex", "yaxs") on.exit(par(opar)) par(cex = cex, yaxs = "i") #first, see if the data come from a psych object with sd and n or se if(length(class(x)) > 1) { cohen.d <- fa.ci <- NULL #strange fix to R compiler names <- cs(statsBy,describe,describeBy, fa.ci,bestScales,cohen.d, cor.ci,corr.test) value <- inherits(x,names,which=TRUE) # value <- class(x)[2] if(any(value > 1) ) { obj <- names[which(value > 0)]} else {obj <- "other"} } else {obj <- "other"} # if(length(class(x)) > 1 ) {if (class(x)[1] == "psych") {obj <- class(x)[2] des <- NULL switch(obj, statsBy = {if(is.null(min.n)) { if(!is.null(effect)) { #convert means to effect sizes compared to a particular group x$mean[,var] <- ( x$mean[,var] -x$mean[effect,var])/x$sd[effect,var] } se <- x$sd[,var]/sqrt(x$n[,var]) x <- x$mean[,var] } else {se <- x$sd[,var] n.obs <- x$n[,var] x <- x$mean[,var] if(!is.null(effect)) { #convert means to effect sizes compared to a particular group x <- ( x$mean[,var] -x$mean[effect,var])/x$sd[effect,var] } if(sd) {se <- x$sd[,var] } else {se <- se/sqrt(n.obs)} x <- subset(x,n.obs > min.n) se <- subset(se,n.obs > min.n) n.obs <- subset(n.obs, n.obs > min.n) }}, describe = {if(sd) {se <- x$sd} else {se <- x$se} if(is.null(labels)) labels <- rownames(x) x <- x$mean names(x) <- labels }, describeBy = {des <- x if(is.null(xlab)) xlab <- var var <- which(rownames(des[[1]]) == var) x <- se <- rep(NA,length(des)) for(grp in 1:length(x)) { x[grp] <- des[[grp]][["mean"]][var] if(sd) {se[grp] <- des[[grp]][["sd"]][var]} else {se[grp] <- des[[grp]][["se"]][var]} } names(x) <- names(des) if(is.null(xlab)) xlab <- var }, fa.ci ={se = x$cis$sds if(is.null(labels)) labels <-rownames(x$cis$means) x <-x$cis$means }, bestScales = {if(!missing(items)) { se=x$items[[items]][,3,drop=FALSE] browser() rn=rownames(x$items[[items]]) x <- x$items[[items]][,2,drop=FALSE] names(x) <- rn des <- NULL } else {se <- x$stats$se rn <- rownames(x$stats) x <- x$stats$mean names(x) <-rn des <- NULL} }, cohen.d = {des <- x$cohen.d[,"effect"] se <- x$se if(!is.null(x$dict)) { cn <- colnames(x$dict)[which(colnames(x$dict) %in% c("Content","Items","content","item","Item"))] # names <- x$dict %in% c("Content","Items","content","item","Items") names <- x$dict[,cn] # names <- x$dict[,"Content"] } else {names <- rownames(x$cohen.d)} x <- des names(x) <- names sd <- TRUE #use these values for the confidence intervals }, reliability ={ x <- x$splits if (sort) { if(is.null(order)) {ord <- order(x,decreasing=!decreasing) } else {ord<- order} } else {ord <- n.var:1} x <- x[ord] names <- rownames(x) se <- NULL }, cor.ci = {des=x$means se = x$sds names= rownames(x$ci) x <- x$means names(x) <- names if(missing(main)) {main="Confidence intervals of correlations"} if(missing(xlab) ) {xlab="Correlation"} }, corr.test = {des= x$ci$r if(x$sym) {se= x$se[lower.tri(x$se)]} else { se = as.vector(x$se)} names= rownames(x$ci) x <- x$ci$r names(x) <- names if(missing(main)) {main="Confidence intervals of correlations"} if(missing(xlab) ) {xlab="Correlation"} }, other = {} #an empty operator )#end switch if (obj=="other"){ if(is.null(group)) { #the case of just one observation per condition if(is.null(stats)) { if(is.null(dim(x))) {se <- rep(0,length(x)) des <- x labels=NULL } else { des <- describe(x) x <-des$mean if(sd) { se <- des$sd} else {se <- des$se} names(x) <- rownames(des)} } else { #the normal case is to find the means and se x <- stats$mean se <- stats$se names(x) <- rownames(stats) des <- NULL } } else { if(is.null(xlab)) xlab <- var des <- describeBy(x,group=group) x <- se <- rep(NA,length(des)) names(x) <- names(des) var <- which(rownames(des[[1]]) == var) for(grp in 1:length(des)) { x[grp] <- des[[grp]][["mean"]][var] if(sd) { se[grp] <- des[[grp]][["sd"]][var]} else {se[grp] <- des[[grp]][["se"]][var]} }} } n.var <- length(x) # if(!is.null(se) && !sd) {ci <- qnorm((1-alpha/2))*se} else {ci <- NULL} if (sort) { if(is.null(order)) {ord <- order(x,decreasing=!decreasing) } else {ord<- order} } else {ord <- n.var:1} x <- x[ord] se <- se[ord] temp <- temp.se <- rep(NA,min(head+tail,n.var)) if((head+tail) < n.var) { if (head > 0 ){ temp[1:head] <- x[1:head] temp.se[1:head] <- se[1:head] names(temp) <- names(x)[1:head] } if(tail > 0 ) {temp[(head + 1):(head + tail)] <- x[(length(x)-tail+1):length(x)] temp.se[(head + 1):(head + tail)] <- se[(length(x)-tail+1):length(x)] names(temp)[(head + 1):(head + tail)] <- names(x)[(length(x)-tail+1):length(x)] } x <- temp se <- temp.se } if(missing(main)) {if(sd) {main <- "means + standard deviation"} else {main="Confidence Intervals around the mean"}} if(is.null(labels)) labels <- names(x) if(sd) {ci <- se} else {ci <- qnorm((1-alpha/2))*se} # if(!is.null(se)) {ci <- qnorm((1-alpha/2))*se} else {ci <- NULL} if(!is.null(ci) && is.null(xlim)) xlim <- c(min(x - ci,na.rm=TRUE),max(x + ci,na.rm=TRUE)) labels <- substr(labels,1,max.labels) if(eyes) { #get ready to draw catseyes ln <- seq(-3,3,.1) rev <- (length(ln):1) } if (!is.numeric(x)) stop("'x' must be a numeric vector or matrix") n <- length(x) if (is.matrix(x)) { if (is.null(labels)) labels <- rownames(x) if (is.null(labels)) labels <- as.character(1L:nrow(x)) labels <- rep_len(labels, n) if (is.null(groups)) groups <- col(x, as.factor = TRUE) glabels <- levels(groups) } else { if (is.null(labels)) labels <- names(x) glabels <- if (!is.null(groups)) levels(groups) if (!is.vector(x)) { warning("'x' is neither a vector nor a matrix: using as.numeric(x)") x <- as.numeric(x) } } if(!add) plot.new() linch <- if (!is.null(labels)) max(strwidth(labels, "inch"),label.width, na.rm = TRUE) else 0 if (is.null(glabels)) { ginch <- 0 goffset <- 0 } else { ginch <- max(strwidth(glabels, "inch"),label.width, na.rm = TRUE) goffset <- 0.4 } if (!(is.null(labels) && is.null(glabels))) { nmai <- par("mai") nmai[2L] <- nmai[4L] + max(linch + goffset, ginch) + 0.1 par(mai = nmai) } if (is.null(groups)) { o <- 1L:n if(!is.null(select)) o <- o[select] y <- o x <- x[o] if(!is.null(ci)) ci <- ci[o] ylim <- c(0, n + 1) } else { o <- sort.list(as.numeric(groups), decreasing = TRUE) x <- x[o] groups <- groups[o] color <- rep_len(color, length(groups))[o] lcolor <- rep_len(lcolor, length(groups))[o] offset <- cumsum(c(0, diff(as.numeric(groups)) != 0)) y <- 1L:n + 2 * offset ylim <- range(0, y + 2) } plot.window(xlim = xlim, ylim = ylim, log = "") lheight <- par("csi") if (!is.null(labels)) { linch <- max(strwidth(labels, "inch"), na.rm = TRUE) loffset <- (linch + 0.1)/lheight labs <- labels[o] mtext(labs, side = 2, line = loffset, at = y, adj = 0, col = color, las = 2, cex = cex, ...) } abline(h = y, lty = "dotted", col = lcolor) points(x, y, pch = pch, col = color, bg = bg, cex = pt.cex/cex) if(!is.null(ci)) {if(!eyes) { segments(x - ci, y, x+ci, y,bg=bg,col=fg,...) # col = par("fg"), lty = par("lty"), lwd = par("lwd")) } } if (!is.null(groups)) { gpos <- rev(cumsum(rev(tapply(groups, groups, length)) + 2) - 1) ginch <- max(strwidth(glabels, "inch"), na.rm = TRUE) goffset <- (max(linch + 0.2, ginch, na.rm = TRUE) + 0.1)/lheight mtext(glabels, side = 2, line = goffset, at = gpos, adj = 0, col = gcolor, las = 2, cex = cex, ...) if (!is.null(gdata)) { abline(h = gpos, lty = "dotted") points(gdata, gpos, pch = gpch, col = gcolor, bg = bg, cex = pt.cex/cex, ...) } } if(eyes) { for (e in 1:(min(head+tail,n.var))) {catseye(x[e],y[e],ci[e]/qnorm(1-alpha/2),alpha=alpha,density=density) }} if(!add) axis(1) if(!add) box() title(main = main, xlab = xlab, ylab = ylab, ...) result <- list(des =des,order=ord) invisible(result) #report the order if sort } #modified from catseyes in error.bars "catseye" <- function(x,y,ci,alpha,density=density,col=col) { SCALE=.7 ln <- seq(-3,3,.1) rev <- (length(ln):1) norm <- dnorm(ln) # clim <- qnorm(alpha/2) #norm <- dt(ln,n-1) clim <- qnorm(alpha/2) #clim <- ci norm <- c(norm,-norm[rev]) ln <- seq(-3,3,.1) cln <- seq(clim,-clim,.1) cnorm <- dnorm(cln) cnorm <- c(0,cnorm,0,-cnorm,0) #this closes the probability interval # polygon(norm*SCALE*ci+x,c(ln,ln[rev])+y) polygon(c(ln,ln[rev])*ci+x, norm*SCALE+y) # polygon(cnorm*SCALE+x,c(clim,cln,-clim,-cln,clim)*ci+y,density=density,col=col) } psych/R/p.rep.R0000644000176200001440000000315310702323025012744 0ustar liggesusers"p.rep" <- function(p=.05,n=NULL,twotailed=FALSE) { df <- n - 2 if(twotailed) p <- 2*p p.rep <- pnorm(qnorm((1-p))/sqrt(2)) if (!is.null(n)) { t <- -qt(p/2,df) r.equiv <- sqrt(t^2/(t^2 + df)) dprime = 2*t*sqrt(1/df) return(list(p.rep=p.rep,d.prime=dprime,r.equiv=r.equiv)) } else { return(p.rep)} } "p.rep.f" <- function(F,df2,twotailed=FALSE) { p <- pf(F,1,df2,lower.tail=FALSE) dprime = sqrt(4*F/(df2)) if(twotailed) p <- 2*p p.rep <- pnorm(qnorm(1-p)/sqrt(2)) if(twotailed) p <- 2*p r.equiv <- sqrt(F/(F+df2)) return(list(p.rep=p.rep,dprime=dprime,prob=p,r.equiv=r.equiv))} "p.rep.r" <- function(r,n,twotailed=TRUE) { dprime <- 2*r/sqrt(1-r^2) sigmad <- sqrt(4/(n-4)) z <- dprime/sigmad p <- 1- pnorm(z) p.rep <- pnorm(qnorm((1-p))/sqrt(2)) if(twotailed) p <- 2*p return(list(p.rep=p.rep,dprime=dprime,prob=p))} "p.rep.t" <- function(t,df,df2=NULL,twotailed=TRUE) { if (is.null(df2)) { dprime = 2*t/sqrt(df) nc <- 1 } else { n1 <- df+1 n2 <- df2+1 df <- df + df2 nc <- ((n1+n2)/2) / ((2*n1 * n2)/(n1+n2)) #average n /n harmonic dprime <- (2* t /sqrt(df)) * sqrt(nc) } p <- pt(t,df,lower.tail=FALSE) r.equiv <- sqrt(t^2/(t^2 + df)) if(twotailed) p <- 2*p p.rep <- pnorm(qnorm((1-p))/sqrt(2)) return(list(p.rep=p.rep,dprime=dprime,prob=p,r.equiv=r.equiv))} "p.rep.z" <- function(z,n,twotailed=TRUE) { dprime = 2*z*sqrt(1/n) p <- (1-pnorm(z )) r.equiv <- sqrt(z^2/(z^2 + n)) if(twotailed) p <- 2*p p.rep <- pnorm(qnorm((1-p))/sqrt(2)) return(list(p.rep=p.rep,dprime=dprime,prob=p,r.equiv=r.equiv))} psych/R/make.congeneric.R0000644000176200001440000000134211133415545014756 0ustar liggesusers"make.congeneric" <- function(loads = c(0.8, 0.7, 0.6, 0.5),N = 1000, err=NULL, short=TRUE) { n <- length(loads) loading <- matrix(loads, nrow = n) error <- diag(1, nrow = n) if (!is.null(err)) {diag(error) <- err} else { diag(error) <- sqrt(1 - loading^2) } pattern <- cbind(loading, error) colnames(pattern) <- c("theta", paste("e", seq(1:n), sep = "")) rownames(pattern) <- c(paste("V", seq(1:n), sep = "")) model <- pattern %*% t(pattern) latent <- matrix(rnorm(N * (n + 1)), ncol = (n + 1)) observed <- latent %*% t(pattern) colnames(latent) <- c("theta", paste("e", seq(1:n), sep = "")) if (short) {return(model)} else {result <- list(model=model,pattern=pattern,observed=observed,latent=latent) return(result)} }psych/R/mat.plot.R0000644000176200001440000000215113573023177013471 0ustar liggesusers#developed April 24, 2009 #Functionally replaced by corPlot "mat.plot" <- function(r,colors=FALSE, n=10,main=NULL,zlim=c(0,1)){ { .Deprecated("mat.plot",msg="mat.plot is deprecated. Please use the corPlot function.") if(is.null(main)) {main <- "Correlation plot" } if(!is.matrix(r) & (!is.data.frame(r))) {if((length(class(r)) > 1) & (inherits(r, "psych"))) {if(inherits(r,"omega")) {r <- r$schmid$sl nff <- ncol(r) r <- r[,1:(nff-2)]} else {r <- r$loadings} } } r <- as.matrix(r) if(min(dim(r)) < 2) {stop ("You need at least two dimensions to make a meaningful plot")} if(is.null(n)) {n <- dim(r)[2]} nf <- dim(r)[2] nvar <- dim(r)[1] if(is.null(rownames(r))) rownames(r) <- paste("V",1:nvar) if(is.null(colnames(r))) colnames(r) <- paste("V",1:nf) if(is.null(zlim)) {zlim <- range(r)} if(colors) {gr <- topo.colors(n) ord <- n:1 gr <- gr[ord]} else { gr <- grey((n:0)/n)} ord1 <- seq(nvar,1,-1) if(nvar != nf) { r <- t(r) } r <- r[,ord1] image(r,col=gr,axes=FALSE,main=main,zlim=zlim) box() at1 <- (0:(nf-1))/(nf-1) at2 <- (0:(nvar-1)) /(nvar-1) axis(1,at=at1,labels=rownames(r)) axis(2,at=at2,labels=colnames(r)) } }psych/R/fa.parallel.R0000644000176200001440000005323014444460612014115 0ustar liggesusers#1/2/14 switched the n.iter loop to a mclapply loop to allow for multicore parallel processing "fa.parallel" <- function(x,n.obs=NULL,fm="minres",fa="both",nfactors=1,main="Parallel Analysis Scree Plots",n.iter=20,error.bars=FALSE,se.bars=FALSE,SMC=FALSE,ylabel=NULL,show.legend=TRUE,sim=TRUE,quant=.95,cor="cor",use="pairwise",plot=TRUE,correct=.5,sqrt=FALSE) { cl <- match.call() # if(!require(parallel)) {message("The parallel package needs to be installed to run mclapply")} ci <- 1.96 arrow.len <- .05 nsub <- dim(x)[1] nvariables <- dim(x)[2] resample <- TRUE #this is used as a flag for correlation matrices if((isCorrelation(x)) && !sim) {warning("You specified a correlation matrix, but asked to just resample (sim was set to FALSE). This is impossible, so sim is set to TRUE") sim <- TRUE resample <- FALSE} if (!is.null(n.obs)) { nsub <- n.obs rx <- x resample <- FALSE if(dim(x)[1] != dim(x)[2]) {warning("You specified the number of subjects, implying a correlation matrix, but do not have a correlation matrix, correlations found ") # rx <- cor(x,use="pairwise") #add the option to choose the type of correlation, this allows us to do fa.parallel.poly inside fa.parallel switch(cor, cor = {rx <- cor(x,use=use)}, cov = {rx <- cov(x,use=use) covar <- TRUE}, tet = {rx <- tetrachoric(x,correct=correct)$rho}, poly = {rx <- polychoric(x,correct=correct)$rho}, mixed = {rx <- mixedCor(x,use=use,correct=correct)$rho}, Yuleb = {rx <- YuleCor(x,,bonett=TRUE)$rho}, YuleQ = {rx <- YuleCor(x,1)$rho}, YuleY = {rx <- YuleCor(x,.5)$rho } ) if(!sim) {warning("You specified a correlation matrix, but asked to just resample (sim was set to FALSE). This is impossible, so sim is set to TRUE") sim <- TRUE resample <- FALSE} } } else { if (isCorrelation(x)) {warning("It seems as if you are using a correlation matrix, but have not specified the number of cases. The number of subjects is arbitrarily set to be 100 ") rx <- x nsub = 100 n.obs=100 resample <- FALSE } else { switch(cor, cor = {rx <- cor(x,use=use)}, cov = {rx <- cov(x,use=use) covar <- TRUE}, tet = {rx <- tetrachoric(x,correct=correct)$rho}, poly = {rx <- polychoric(x,correct=correct)$rho}, mixed = {rx <- mixedCor(x,use=use,correct=correct)$rho}, Yuleb = {rx <- YuleCor(x,,bonett=TRUE)$rho}, YuleQ = {rx <- YuleCor(x,1)$rho}, YuleY = {rx <- YuleCor(x,.5)$rho } ) } } valuesx <- eigen(rx)$values #these are the PC values if(SMC) {diag(rx) <- smc(rx) fa.valuesx <- eigen(rx)$values} else { fa.valuesx <- fa(rx,nfactors=nfactors,rotate="none", fm=fm,warnings=FALSE)$values} #these are the FA values if(sqrt) {valuesx <- sqrt(valuesx) fa.valuesx[fa.valuesx>0] <- sqrt(fa.valuesx[fa.valuesx >0])} temp <- list(samp =vector("list",n.iter),samp.fa = vector("list",n.iter),sim=vector("list",n.iter),sim.fa=vector("list",n.iter)) #parallel processing starts here - the more cores the better! #however, mixedCor seems to break this # templist <- lapply(1:n.iter,function(XX) { templist <- mclapply(1:n.iter,function(XX) { #at least for now, the errors from mixedCor prevent mclapply if(is.null(n.obs)) { #Sample the data, column wise (to keep the basic distributional properties, but making the correlations 0 (on average) bad <- TRUE while(bad) {sampledata <- matrix(apply(x,2,function(y) sample(y,nsub,replace=TRUE)),ncol=nvariables) #do it column wise colnames(sampledata) <- colnames(x) #this allows mixedCor to work switch(cor, #we can do a number of different types of correlations cor = {C <- cor(sampledata,use=use)}, cov = {C <- cov(sampledata,use=use) covar <- TRUE}, tet = {C <- tetrachoric(sampledata,correct=correct)$rho}, poly = {C <- polychoric(sampledata,correct=correct)$rho}, mixed = {C <- mixedCor(sampledata,use=use,correct=correct)$rho}, Yuleb = {C <- YuleCor(sampledata,,bonett=TRUE)$rho}, YuleQ = {C <- YuleCor(sampledata,1)$rho}, YuleY = {C <- YuleCor(sampledata,.5)$rho } ) bad <- any(is.na(C)) #some (not frequently) correlations will be improper, particularly if sampling from sparse matrices } #Try resampling until we get a correlation matrix that works values.samp <- eigen(C)$values temp[["samp"]] <- values.samp if (fa!= "pc") { if(SMC) {sampler <- C diag(sampler) <- smc(sampler) temp[["samp.fa"]]<- eigen(sampler)$values} else { temp[["samp.fa"]] <- fa(C,fm=fm,nfactors=nfactors, SMC=FALSE,warnings=FALSE)$values } } } if(sim) { simdata=matrix(rnorm(nsub*nvariables),nrow=nsub,ncol=nvariables) #create simulated data based upon normal theory sim.cor <- cor(simdata) #we must use correlations based upon Pearson here, because we are simulating the data temp[["sim"]] <- eigen(sim.cor)$values if (fa!="pc") { if(SMC) { diag(sim.cor) <- smc(sim.cor) temp[["sim.fa"]]<- eigen(sim.cor)$values} else {fa.values.sim <- fa(sim.cor,fm=fm,nfactors=nfactors,rotate="none",SMC=FALSE,warnings=FALSE)$values temp[["sim.fa"]] <- fa.values.sim }}} replicates <- list(samp=temp[["samp"]],samp.fa=temp[["samp.fa"]],sim=temp[["sim"]],sim.fa=temp[["sim.fa"]]) }) #parallelism stops here #now combine the results if(is.null(ylabel)) { ylabel <- switch(fa, #switch implementation suggested by Meik Michalke 3/20/17 pc = "eigen values of principal components", fa = "eigen values of principal factors", both = "eigenvalues of principal components and factor analysis") } values<- t(matrix(unlist(templist),ncol=n.iter)) if(sqrt) {values[values >0] <- sqrt(values[values>0])} #added 12/13/22 values.sim.mean=colMeans(values,na.rm=TRUE) # if(!missing(quant)) {values.ci = apply(values,2,function(x) quantile(x,quant))} else {values.ci <- values.sim.mean} #fixed Sept 22, 2018 values.ci = apply(values,2,function(x) quantile(x,quant)) #always apply quant if(se.bars) {values.sim.se <- apply(values,2,sd,na.rm=TRUE)/sqrt(n.iter)} else {values.sim.se <- apply(values,2,sd,na.rm=TRUE)} ymin <- min(valuesx,values.sim.mean) ymax <- max(valuesx,values.sim.mean) sim.pcr <- sim.far <- NA switch(fa, pc = { if (plot) { plot(valuesx,type="b", main = main,ylab=ylabel ,ylim=c(ymin,ymax),xlab="Component Number",pch=4,col="blue")} if(resample) { sim.pcr <- values.sim.mean[1:nvariables] sim.pcr.ci <- values.ci[1:nvariables] sim.se.pcr <- values.sim.se[1:nvariables] if (plot) { points(sim.pcr,type ="l",lty="dashed",pch=4,col="red")}} else {sim.pcr <- NA sim.se.pc <- NA} if(sim) { if(resample) {sim.pc <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.pc.ci <- values.ci[(nvariables+1):(2*nvariables)] sim.se.pc <- values.sim.se[(nvariables+1):(2*nvariables)] } else {sim.pc <- values.sim.mean[1:nvariables] sim.pc.ci <- values.ci[1:nvariables] sim.se.pc <- values.sim.se[1:nvariables]} if (plot) { points(sim.pc,type ="l",lty="dotted",pch=4,col="red")} pc.test <- which(!(valuesx > sim.pc.ci))[1]-1} else { sim.pc <- NA sim.pc.ci <- NA sim.se.pc <- NA pc.test <- which(!(valuesx > sim.pcr.ci))[1]-1 } fa.test <- NA sim.far <- NA sim.fa <- NA }, fa = { #ylabel <- "eigen values of principal factors" should not be changed if set (reported by Meik Michalke) if (plot) {plot(fa.valuesx,type="b", main = main,ylab=ylabel ,ylim=c(ymin,ymax),xlab="Factor Number",pch=2,col="blue")} sim.se.pc <- NA if(resample) {sim.far <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.far.ci <- values.ci[(nvariables+1):(2*nvariables)] sim.se.far <- values.sim.se[(nvariables+1):(2*nvariables)] if (plot) { points(sim.far,type ="l",lty="dashed",pch=2,col="red")}} if(sim) { if(resample) {sim.fa <- values.sim.mean[(3*nvariables+1):(4*nvariables)] sim.fa.ci <- values.ci[(3*nvariables+1):(4*nvariables)] sim.se.fa <- values.sim.se[(3*nvariables+1):(4*nvariables)] } else { sim.fa <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.fa.ci <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.se.fa <- values.sim.se[(nvariables+1):(2*nvariables)] sim.far <- NA #added May 1, 2016 sim.far.ci <- NA sim.se.far <- NA } if (plot) {points(sim.fa,type ="l",lty="dotted",pch=2,col="red")} fa.test <- which(!(fa.valuesx > sim.fa.ci))[1]-1 } else {sim.fa <- NA fa.test <- which(!(fa.valuesx > sim.far.ci))[1]-1 } sim.pc <- NA sim.pcr <- NA sim.se.pc <- NA pc.test <- NA }, both = { if (plot) {plot(valuesx,type="b", main = main,ylab=ylabel ,ylim=c(ymin,ymax),xlab="Factor/Component Number",pch=4,col="blue") points(fa.valuesx,type="b",pch=2,col="blue")} if(sim) { if(resample) { sim.pcr <- values.sim.mean[1:nvariables] sim.pcr.ci <- values.ci[1:nvariables] sim.se.pcr <- values.sim.se[1:nvariables] sim.far <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.se.far <- values.sim.se[(nvariables+1):(2*nvariables)] sim.far.ci <- values.ci[(nvariables+1):(2*nvariables)] sim.pc <- values.sim.mean[(2*nvariables+1):(3*nvariables)] sim.pc.ci <- values.ci[(2*nvariables+1):(3*nvariables)] sim.se.pc <- values.sim.se[(2*nvariables+1):(3*nvariables)] sim.fa <- values.sim.mean[(3*nvariables+1):(4*nvariables)] sim.fa.ci <- values.ci[(3*nvariables+1):(4*nvariables)] sim.se.fa <- values.sim.se[(3*nvariables+1):(4*nvariables)] pc.test <- which(!(valuesx > sim.pcr.ci))[1]-1 fa.test <- which(!(fa.valuesx > sim.far.ci))[1]-1 } else { #not resampling, just sim sim.pc <- values.sim.mean[1:nvariables] sim.pc.ci <- values.ci[1:nvariables] sim.se.pc <- values.sim.se[1:nvariables] sim.fa <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.fa.ci <- values.ci[(nvariables+1):(2*nvariables)] sim.se.fa <- values.sim.se[(nvariables+1):(2*nvariables)] pc.test <- which(!(valuesx > sim.pc.ci))[1]-1 fa.test <- which(!(fa.valuesx > sim.fa.ci))[1]-1 } if (plot) { points(sim.pc,type ="l",lty="dotted",pch=4,col="red") points(sim.fa,type ="l",lty="dotted",pch=4,col="red") # sim.pcr <- sim.far <- NA @#removed Dec 31, 2016 points(sim.pcr,type ="l",lty="dashed",pch=2,col="red") points(sim.far,type ="l",lty="dashed",pch=2,col="red")} pc.test <- which(!(valuesx > sim.pc.ci))[1]-1 fa.test <- which(!(fa.valuesx > sim.fa.ci))[1]-1 } else { #sim is false sim.pcr <- values.sim.mean[1:nvariables] sim.pcr.ci <- values.ci[1:nvariables] sim.se.pcr <- values.sim.se[1:nvariables] sim.far <- values.sim.mean[(nvariables+1):(2*nvariables)] sim.far.ci <- values.ci[(nvariables+1):(2*nvariables)] sim.se.far <- values.sim.se[(nvariables+1):(2*nvariables)] sim.fa <- NA sim.pc <- NA sim.se.fa <- NA sim.se.pc <- NA pc.test <- which(!(valuesx > sim.pcr.ci))[1]-1 fa.test <- which(!(fa.valuesx > sim.far.ci))[1]-1 } if(resample) { if (plot) {points(sim.pcr,type ="l",lty="dashed",pch=4,col="red") points(sim.far,type ="l",lty="dashed",pch=4,col="red") } } } ) if(error.bars) { if(!any(is.na(sim.pc))) { for (i in 1:length(sim.pc)) { ycen <- sim.pc[i] yse <- sim.se.pc[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } if(!any(is.na(sim.pcr))) { for (i in 1:length(sim.pcr)) { ycen <- sim.pcr[i] yse <- sim.se.pcr[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } if(!any(is.na(sim.fa))) { for (i in 1:length(sim.fa)) { ycen <- sim.fa[i] yse <- sim.se.fa[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } if(!any(is.na(sim.far))) { for (i in 1:length(sim.far)) { ycen <- sim.far[i] yse <- sim.se.far[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } } if(show.legend && plot) { if(is.null(n.obs)) { #that is, do we have real data or a correlation matrix switch(fa, both = {if(sim) {legend("topright", c(" PC Actual Data", " PC Simulated Data", " PC Resampled Data"," FA Actual Data", " FA Simulated Data", " FA Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid","dotted", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90') } else {legend("topright", c(" PC Actual Data", " PC Resampled Data"," FA Actual Data", " FA Resampled Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA,NA), text.col = "green4", lty = c("solid","dashed", "solid","dashed"), merge = TRUE, bg = 'gray90')}}, pc = {if(sim) {legend("topright", c(" PC Actual Data", " PC Simulated Data", " PC Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid","dotted", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90')} else { legend("topright", c(" PC Actual Data", " PC Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90') } } , fa = {if(sim) {legend("topright", c(" FA Actual Data", " FA Simulated Data", " FA Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid","dotted", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90')} else { legend("topright", c(" FA Actual Data", " FA Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90') } } ) } else { switch(fa, both= { legend("topright", c("PC Actual Data", " PC Simulated Data","FA Actual Data", " FA Simulated Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA), text.col = "green4", lty = c("solid","dotted","solid","dotted"), merge = TRUE, bg = 'gray90')}, pc= { legend("topright", c("PC Actual Data", " PC Simulated Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA), text.col = "green4", lty = c("solid","dotted","solid","dotted"), merge = TRUE, bg = 'gray90')}, fa = {legend("topright", c("FA Actual Data", " FA Simulated Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA), text.col = "green4", lty = c("solid","dotted","solid","dotted"), merge = TRUE, bg = 'gray90')})} } colnames(values) <- paste0("Sim",1:ncol(values)) if(fa!= "fa" && plot) abline(h=1) #switched to fa not pc 2/27/2 results <- list(fa.values = fa.valuesx,pc.values=valuesx,pc.sim=sim.pc,pc.simr = sim.pcr,fa.sim=sim.fa,fa.simr = sim.far,nfact=fa.test,ncomp=pc.test, Call=cl) if (fa == "pc" ) { colnames(values)[1:nvariables] <- paste0("C",1:nvariables) } else { colnames(values)[1:(2*nvariables)] <- c(paste0("C",1:nvariables),paste0("F",1:nvariables)) if(sim) { if(resample) colnames(values)[(2*nvariables +1):ncol(values)] <- c(paste0("CSim",1:nvariables),paste0("Fsim",1:nvariables)) } results$nfact <- fa.test} results$ncomp <- pc.test results$values <- values cat("Parallel analysis suggests that ") cat("the number of factors = ",fa.test, " and the number of components = ",pc.test,"\n") class(results) <- c("psych","parallel") return(invisible(results)) } #a cut down plotting function "plot_fa.parallel" <- function(x,n.obs,fa,show.legend,error.bars=FALSE,main="Parallel Analysis Scree Plots",...) { if(missing(n.obs)) n.obs <- NULL if(missing(fa)) fa <- "both" if(missing(show.legend)) show.legend <- TRUE if(missing(error.bars)) error.bars <- FALSE ci <- 1.96 arrow.len <- .05 fa.valuesx <- x$fa.values fa.values.sim <- x$fa.sim valuesx <- x$pc.values values.sim <- x$pc.sim if(!is.null(x$fa.simr)) {resample=TRUE } else {resample <- FALSE} #ymax <- max(valuesx,values.sim$mean) #ymin <- min(valuesx,values.sim$mean) ymax <- max(valuesx,values.sim,na.rm=TRUE) ymin <- min(valuesx,values.sim,fa.valuesx,fa.values.sim,na.rm=TRUE) ylabel <- "eigen values of principal factors" if (!is.null(valuesx)) { plot(valuesx,type="b", main = main,ylab=ylabel ,ylim=c(ymin,ymax),xlab="Factor Number",pch=4,col="blue") } points(values.sim,type ="l",lty="dotted",pch=2,col="red") if(error.bars) { for (i in 1:dim(values.sim)[1]) { ycen <- fa.values.sim$mean[i] yse <- fa.values.sim$se[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } points(fa.values.sim,type ="l",lty="dashed",pch=2,col="red") if(error.bars) { for (i in 1:dim(values.sim)[1]) { ycen <- fa.values.sim[i] yse <- fa.values.sim[i] arrows(i,ycen-ci*yse,i,ycen+ci* yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} } if (fa !="fa") points(fa.valuesx,type ="b",lty="solid",pch=2,col="blue") points(fa.values.sim,type ="l",lty="dotted",pch=2,col="red") if(is.null(n.obs)) {points(fa.values.sim,type ="l",lty="dashed",pch=2,col="red")} if(resample) { points(x$pc.simr,type ="l",lty="dashed",pch=4,col="red") points(x$fa.simr,type ="l",lty="dashed",pch=4,col="red")} if(show.legend) {if(resample) { legend("topright", c(" PC Actual Data", " PC Simulated Data", " PC Resampled Data"," FA Actual Data", " FA Simulated Data", " FA Resampled Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid","dotted", "dashed","solid","dotted", "dashed"), merge = TRUE, bg = 'gray90') } else { legend("topright", c("PC Actual Data", " PC Simulated Data","FA Actual Data", " FA Simulated Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA), text.col = "green4", lty = c("solid","dotted","solid","dotted"), merge = TRUE, bg = 'gray90') } } abline(h=1) if (fa!="pc") {abline(h=0) } } #modified June 09, 2013 to fix the case for just PC tests #modified October 2, 2013 to sample each column of data separately. #modified March 23, 2014 to check for bad resamples in case of very sparse data #also modified to just resample if desired "test.fa.parallel" <- function(x,n.obs=NULL) { fp <- list() fp[[1]] <- fa.parallel(x,n.obs=n.obs) fp[[2]] <-fa.parallel(x,sim=FALSE,n.obs=n.obs) fp[[3]] <- fa.parallel(x,error.bars=TRUE,n.obs=n.obs) fp[[4]] <- fa.parallel(x,error.bars=TRUE,sim=FALSE,n.obs=n.obs) fp[[5]] <- fa.parallel(x,fa="fa",n.obs=n.obs) fp[[6]] <- fa.parallel(x,fa="fa",sim=FALSE,n.obs=n.obs) fp[[7]] <- fa.parallel(x,fa="fa",error.bars=TRUE,n.obs=n.obs) fp[[8]] <- fa.parallel(x,fa="fa",error.bars=TRUE,sim=FALSE,n.obs=n.obs) fp[[9]] <- fa.parallel(x,fa="pc",n.obs=n.obs) fp[[10]] <- fa.parallel(x,fa="pc",sim=FALSE,n.obs=n.obs) fp[[11]] <- fa.parallel(x,fa="pc",error.bars=TRUE,n.obs=n.obs) fp[[12]] <- fa.parallel(x,fa="pc",error.bars=TRUE,sim=FALSE,n.obs=n.obs) fp[[13]] <- fa.parallel(x,quant=.95,n.obs=n.obs) return(fp) } print_psych.parallel <- function(x,digits=2) { cat("Call: ") print(x$Call) if(!is.null(x$fa.values) & !is.null(x$pc.values) ) { parallel.df <- data.frame(fa=x$fa.values,fa.sim =x$fa.sim,pc= x$pc.values,pc.sim =x$pc.sim) fa.test <- x$nfact pc.test <- x$ncomp cat("Parallel analysis suggests that ") cat("the number of factors = ",fa.test, " and the number of components = ",pc.test,"\n") cat("\n Eigen Values of \n") colnames(parallel.df) <- c("Original factors","Simulated data","Original components", "simulated data")} if(is.na(fa.test) ) fa.test <- 0 if(is.na(pc.test)) pc.test <- 0 if(!any(is.na(parallel.df))) {print(round(parallel.df[1:max(fa.test,pc.test),],digits))} else { if(!is.null(x$fa.values)) {cat("\n eigen values of factors\n") print(round(x$fa.values,digits))} if(!is.null(x$fa.sim)){cat("\n eigen values of simulated factors\n") print(round(x$fa.sim,digits))} if(!is.null(x$pc.values)){cat("\n eigen values of components \n") print(round(x$pc.values,digits))} if(!is.null(x$pc.sim)) {cat("\n eigen values of simulated components\n") print(round(x$pc.sim,digits=digits))} } } psych/R/cluster.fit.R0000644000176200001440000000473113571765330014205 0ustar liggesuserscluster.fit <- function(original,load,clusters,diagonal=FALSE) { Pattern <- TRUE df <- nrow(original) * (ncol(original)-1)/2 sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal load <- as.matrix(load) clusters <- as.matrix(clusters) model <- load %*% t(load) #reproduce the correlation matrix by the factor law R= FF' except these are not orthogonal residual <- original-model #find the residual R* = R - FF' sqresid <- residual*residual #square the residuals totalresid <- sum(sqresid)- diagonal * sum(diag(sqresid) ) #sum squared residuals - the main diagonal fit <- 1-totalresid/totaloriginal #fit is 1-sumsquared residuals/sumsquared original (of off diagonal elements) covar <- t(clusters) %*% original %*% clusters #matrix algebra is our friend phi <- cov2cor(covar) phi.inv <- try(solve(phi),TRUE) if(inherits(phi.inv,as.character("try-error"))) {Pattern <- FALSE message("Could not invert cluster intercorrelation matrix, pattern matrix not found") } #can not invert matrix if(Pattern) { pattern <- load %*% phi.inv model2 <- pattern %*% t(load) residual <- original - model2 sqresid <- residual*residual totalresid <- sum(sqresid) -(1-diagonal) * sum(diag(sqresid)) #changed Sept 2, 2012 to make more sense (i.e. don't count diagonal if diagonal is false) patternrmse <- sqrt(totalresid/(2*df)) fit2 <- 1-totalresid/totaloriginal } else {fit2 <- NULL patternrmse <- 0} clusters <- abs(clusters) #why do I do this? model.1 <- (load * clusters) %*% phi %*% t(load*clusters) #because the items are already signed residual <- original - model.1 sqresid <- residual*residual #square the residuals totalresid <- sum(sqresid)- diagonal * sum(diag(sqresid) ) #sum squared residuals - the main diagonal fit.1 <- 1-totalresid/totaloriginal #fit is 1-sumsquared residuals/sumsquared original (of off diagonal elements clusterrmse <- sqrt(totalresid/(2*df)) cluster.fit <- list(clusterfit=fit.1,structurefit=fit,patternfit=fit2,clusterrmse=clusterrmse,patternrmse=patternrmse) } psych/R/irt.1p.R0000644000176200001440000000163410500621756013047 0ustar liggesusers#steps towards an IRT program #we find the difficulties using irt.item.diff.rasch #now estimate the thetas #Then, to find the person parameters, use optimize "irt.1p" <- function(delta,items) { # #the basic one parameter model (aka a Rasch model) irt <- function(x,delta,scores) { fit <- -1*(log(scores/(1+exp(delta-x)) + (1-scores)/(1+exp(x-delta)))) mean(fit,na.rm=TRUE) } # delta<- delta items <-items num <- dim(items)[1] fit <- matrix(NA,num,2) total <- rowMeans(items,na.rm=TRUE) count <- rowSums(!is.na(items)) for (i in 1:num) { if (count[i]>0) {myfit <- optimize(irt,c(-4,4),delta=delta,scores=items[i,]) #how to do an apply? fit[i,1] <- myfit$minimum fit[i,2] <- myfit$objective #fit of optimizing program } else { fit[i,1] <- NA fit[i,2] <- NA } #end if else } #end loop irt.1p <-data.frame(total,theta=fit[,1],fit=fit[,2],count)}psych/R/faRotate.r0000644000176200001440000002530714214733545013550 0ustar liggesusersfaRotations <- function(loadings,r=NULL, rotate="oblimin" , hyper= .15, n.rotations =10,...) { #adapted the multiple rotation technique from Niels Waller in fungible faMain #from fungible randStart <- function(dimension) { qr.Q(qr(matrix(rnorm(dimension^2), dimension, dimension)))} starts <- vector("list", n.rotations) hyperplane <- fit <- complexity <- indetermin <- list() fn <- NULL if(inherits(loadings, "fa")) {fn <- loadings$fn loadings <- loadings$loadings } if(inherits(loadings, "principal")) {fn <- loadings$fn loadings <- loadings$loadings} starts <- lapply(starts, function(x) randStart(dimension = ncol(loadings))) starts[[1]] <- diag(ncol(loadings)) original <- loadings if(!is.null(r)) {r.inv <- solve(r)} else {r.inv <- NULL} for(i in 1:n.rotations) { initial <- starts[[i]] loadings <- original cl <- match.call() gpa <- c("Varimax","quartimax","bentlerQ","bentlerT","geominT","geominQ","targetT","targetQ","TargetT","quartimin","oblimin","simplemax") if (rotate=="varimax" |rotate=="Varimax" | rotate=="quartimax" | rotate =="bentlerT" | rotate =="geominT" | rotate =="targetT" | rotate =="bifactor" | rotate =="TargetT"| rotate =="equamax"| rotate =="varimin"|rotate =="specialT" | rotate =="Promax" | rotate =="promax"| rotate =="cluster" |rotate == "biquartimin" |rotate == "TargetQ" |rotate =="specialQ" ) { Phi <- NULL switch(rotate, #The orthogonal cases for GPArotation + ones developed for psych varimax = {rotated <- stats::varimax(loadings) #varimax is from stats, the others are from GPArotation loadings <- rotated$loadings rot.mat <- rotated$rotmat}, Varimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #varimax is from the stats package, Varimax is from GPArotations #rotated <- do.call(rotate,list(loadings,...)) #rotated <- do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...)) rotated <- GPArotation::Varimax(loadings,Tmat=initial,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , quartimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings)) rotated <- GPArotation::quartimax(loadings,Tmat=initial,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bentlerT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::bentlerT(loadings,Tmat=initial,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , geominT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::geominT(loadings,Tmat=initial,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , targetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rotated <- GPArotation::targetT(loadings,Tmat=initial,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bifactor = {rot <- bifactor(loadings,Tmat=initial) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, TargetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rot <- GPArotation::targetT(loadings,Tmat=initial,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, equamax = {rot <- equamax(loadings,Tmat=initial,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, varimin = {rot <- varimin(loadings,Tmat=initial,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, specialT = {rot <- specialT(loadings,Tmat=initial,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, Promax = {pro <- Promax(loadings) #Promax without Kaiser normalization loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, promax = {#pro <- stats::promax(loadings,...) #from stats pro <- kaiser(loadings,rotate="Promax",...) #calling promax will now do the Kaiser normalization before doing Promax rotation loadings <- pro$loadings rot.mat <- pro$rotmat # ui <- solve(rot.mat) # Phi <- cov2cor(ui %*% t(ui)) Phi <- pro$Phi }, cluster = {loadings <- varimax(loadings,...)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, biquartimin = {ob <- biquartimin(loadings,Tmat=initial,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, TargetQ = {ob <- TargetQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, specialQ = {ob <- specialQ(loadings,Tmat=initial,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(pro$Th))}) } else { #The following oblique cases all use GPArotation if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,Tmat=initial))) if(inherits(ob,as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } else {message("Specified rotation not found, rotate='none' used") Phi<- NULL} } signed <- sign(colSums(loadings)) signed[signed==0] <- 1 loadings <- loadings %*% diag(signed) #flips factors to be in positive direction but loses the colnames if(!is.null(Phi)) {Phi <- diag(signed) %*% Phi %*% diag(signed) } #added October 20, 2009 to correct bug found by Erich Studerus #fit statistics added 3/14/22 if(!is.null(r.inv)) { if(!is.null(Phi)) {indetermin[[i]] <- tr(t(loadings) %*% r.inv %*% loadings %*% Phi) } else { indetermin[[i]] <- tr(t(loadings) %*% r.inv %*% loadings) } } else {indetermin[[i]] <- NA} hyperplane[[i]] <- mean(abs(loadings) < hyper) complexity[[i]] <- mean((apply(loadings,1,function(x) sum(x^2)))^2/apply(loadings,1,function(x)sum(x^4))) fit[[i]]<- tr( var(loadings^2)) } hyperplane<- unlist(hyperplane) fit <- unlist(fit) complexity<- unlist(complexity) indetermin <- unlist(indetermin) stats <- matrix(c(hyperplane,fit,complexity,indetermin),ncol=4) colnames(stats)<- c("hyperplane","fit","complexity","indetermin") best <- which(stats[,"hyperplane"]==max(stats[,"hyperplane"])) if(length(best) > 1) best <- which(stats[best,"complexity"]==min(stats[best,"complexity"])) if(length(best)> 1) which(stats[best,"fit"]==min(stats[best,"fit"])) if(length(best)> 1) best <- best[1] if(rotate =="varimax") {rotate <- "Varimax" warning("Varimax from GPA rotation is used instead of varimax from stats")} Tmat <- starts[[best]] # now rerotate to the best fitted solution # need to consider those rotations that do not use GPArotation gpa <- c("Varimax","quartimax","bentlerQ","bentlerT","geominT","geominQ","targetT","targetQ","TargetT","quartimin","oblimin","simplimax") if(rotate %in% gpa) { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(original,Tmat=Tmat))) if(inherits(ob,as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") rot.mat<- NULL} else { loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } else { #do the other rotations switch(rotate, bifactor = {rot <- bifactor(loadings,Tmat=Tmat) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, equamax = {rot <- equamax(loadings,Tmat=Tmat) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, varimin = {rot <- varimin(loadings,Tmat=Tmat) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, specialT = {rot <- specialT(loadings,Tmat=Tmat) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, Promax = {pro <- Promax(loadings) #Promax without Kaiser normalization loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, promax = {#pro <- stats::promax(loadings,...) #from stats pro <- kaiser(loadings,rotate="Promax",...) #calling promax will now do the Kaiser normalization before doing Promax rotation loadings <- pro$loadings rot.mat <- pro$rotmat # ui <- solve(rot.mat) # Phi <- cov2cor(ui %*% t(ui)) Phi <- pro$Phi }, cluster = {loadings <- varimax(loadings,...)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, biquartimin = {ob <- biquartimin(loadings,Tmat=Tmat) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, TargetQ = {ob <- TargetQ(loadings,Tmat=Tmat,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, specialQ = {ob <- specialQ(loadings,Tmat=Tmat) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(pro$Th))}) } result <- list(rotation.stats=stats,loadings=loadings,Phi=Phi,rot.mat=rot.mat, fn=fn, Call=cl) class(result)<- c("psych","fa") return(result) } psych/R/omega.diagram.R0000644000176200001440000002011014300214163014403 0ustar liggesusers#factor analysis and sem diagrams #based upon fa.graph with some ideas taken from the diagram and shape packages of Karline Soetaert #version of September 20, 2009 #developed to replace Rgraphviz which is too much of a pain to install #Rgraphviz uses a NEL (Node, Edge) represenation while diagram uses a complete linking matrix #thus, I am trying to combine these two approaches #modified 31/5/14 to allow for drawing factor extension derived omegas #and again 7/2/18 to include omegaDirect output "omega.diagram" <- function(om.results,sl=TRUE,sort=TRUE,labels=NULL,flabels=NULL,cut=.2,gcut=.2,simple=TRUE,errors=FALSE, digits=1,e.size=.1,rsize=.15,side=3,main=NULL,cex=NULL,color.lines=TRUE ,marg=c(.5,.5,1.5,.5),adj=2, ...) { if(color.lines) { colors <- c("black","red")} else {colors <- c("black","black") } Phi <- NULL #the default case if(is.null(cex)) cex <- 1 old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back #Figure out what type of input we are drawing (done July 2, 2018) #fixed for R 4.0.0 December 11, 2019 extend<- FALSE #which kind of input are we drawing? if(length(class(om.results)) > 1) { omegaSem <- omegaDirect <- omega <- NULL #strange fix to R 4,0,0 compiler names <- cs(matrix, extend, omegaSem, omegaDirect, omega) value <- inherits(om.results,names,which=TRUE) # value <- class(x)[2] if(any(value > 1) ) { result <- names[which(value > 0)]} else {result <- "other"} } else {result <- "extend"} #just a raw matrix if(result == "matrix") result <- "extend" #dispatch to the right option switch(result, extend = {extend <- TRUE #class(om.results)[2] <- "omega" factors <- om.results nvar <- num.var <- nrow(factors) num.factors <- ncol(factors) -1 if(sort) {temp <- fa.sort(factors[,-1]) temp2 <- factors[,1,drop=FALSE] #added the drop option November 4, 2018 factors <- cbind(g=temp2[rownames(temp),1],temp)} #added column 1 }, omegaSem = { #did we do an omegaSem or just an omegaFromSem? if(is.null(om.results$omega.efa$cfa.loads)) {cfa.loads <- om.results$cfa.loads} else {cfa.loads <- om.results$omega.efa$cfa.loads} # factors <- as.matrix(om.results$omega.efa$cfa.loads[,2:ncol(om.results$omega.efa$cfa.loads)]) class(cfa.loads) <- c("psych","omegaSem") if(sort) cfa.loads <- fa.sort(cfa.loads) factors <- as.matrix(cfa.loads) gloading <- cfa.loads[,1,drop=FALSE] nvar <- num.var <- nrow(gloading) num.factors <- ncol(factors) -1 sl=TRUE main <- "Omega from SEM" }, omegaDirect = {factors <- om.results$loadings nvar <- num.var <- nrow(factors) num.factors <- ncol(factors) -1 if(sort) {temp <- fa.sort(factors[,-1]) temp2 <- factors[,1] factors <- cbind(g=temp2[rownames(temp)],temp)} }, omega ={ if(extend) class(om.results) <- c("psych","omega") if(sort) om.results <- fa.sort(om.results) #usually sort, but sometimes it is better not to do so if (sl) {factors <- as.matrix(om.results$schmid$sl) if(is.null(main)) {main <- "Omega with Schmid Leiman Transformation" } } else {factors <- as.matrix(om.results$schmid$oblique) if(is.null(main)) {main <- "Hierarchical (multilevel) Structure" } } gloading <- om.results$schmid$gloading nvar <- num.var <- dim(factors)[1] #how many variables? if (sl ) {num.factors <- dim(factors)[2] - 1 - (!extend) *3 } else {num.factors <- dim(factors)[2] } }, other ={warning("I am sorry, I don't know how to diagram this input")} ) if(result !="other") {#skip to the end if we don' know what we are doing #now draw the figure e.size <- e.size * 10/ nvar #this is an arbitrary setting that seems to work #first some basic setup parameters vars <- paste("V",1:num.var,sep="") # if (!is.null(labels)) {vars <- paste(labels)} else{vars <- rownames(factors) } #patched to actually draw the labels! 8/21/22 if (!is.null(labels)) {rownames(factors) <- paste(labels)} else{vars <- rownames(factors) } if(!is.null(flabels)) {fact <- flabels} else { if(sl) {fact <- c("g",paste("F",1:num.factors,"*",sep="")) } else {fact <- c(paste("F",1:num.factors,sep="")) } } # e.g. "g" "F'1" "F2" "F3" colnames(factors)[1:length(fact)] <- fact var.rect <- list() arrows.list <- list() fact.rect <- list() max.len <- max(nchar(rownames(factors)))*rsize cex <- min(cex,40/nvar) xleft <- -max.len/2 #0# xright <- nvar + 1 # or hard code to 3? plot(0,type="n",xlim=c(xleft-max.len,xright+1),ylim=c(1,nvar+1),frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) if (sl) {vloc <- (xright)/2 gloc <- xleft grouploc <-xright start <- 1 end <- num.factors+1} else { vloc <- xleft gloc <- xright grouploc <- (xright)/2 start <- 0 end <- num.factors } for (v in 1:nvar) { var.rect[[v]] <- dia.rect(vloc,nvar-v+1,rownames(factors)[v],xlim=c(0,nvar),ylim=c(0,nvar),cex=cex,draw=FALSE,...) } f.scale <- (nvar+ 1)/(num.factors+1) f.shift <- nvar/num.factors for (f in 1:num.factors) { fact.rect[[f]] <- dia.ellipse(grouploc,(num.factors+1-f)*f.scale,colnames(factors)[f+start],xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,draw=TRUE,...) for (v in 1:nvar) { if (abs(factors[v,f+start]) > cut) {d.arrow <- dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,col=colors[((sign(factors[v,f+start])<0) +1)],lty=((sign(factors[v,f+start])<0)+1),labels=round(factors[v,f+start],digits),adj=f %% adj +1,draw=FALSE) if(!is.null(d.arrow) )arrows.list <- c(arrows.list,d.arrow) } } } g.ellipse <- dia.ellipse(gloc,(num.var+1)/2,"g",xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,...) if(!sl) { for (f in 1:num.factors) { d.arrow <- dia.arrow(from=g.ellipse,to=fact.rect[[f]],col=colors[((sign(gloading[f])<0) +1)],lty=((sign(gloading[f])<0) +1),labels=round(gloading[f],digits),adj=f %% adj +1,draw=FALSE) arrows.list <- c(arrows.list,d.arrow) } } else { for (i in 1:nvar) { if(abs(factors[i,1]) > gcut) { d.arrow <- dia.arrow(from=g.ellipse,to=var.rect[[i]]$left,col=colors[((sign(factors[i,1])<0) +1)],lty=((sign(factors[i,1])<0)+1),labels=round(factors[i,1],digits),adj=1,draw=FALSE)} if(!is.null(d.arrow) ) arrows.list <- c(arrows.list,d.arrow) } } if (errors) {for (v in 1:nvar) { dia.self(location=var.rect[[v]],scale=.5,side=side) } } } #end of normal case tv <- matrix(unlist(var.rect),nrow=nvar,byrow=TRUE) all.rects.x <- tv[,5] #the center of the figure all.rects.y <- tv[,2] all.rects.names <- rownames(factors) dia.rect(all.rects.x, all.rects.y,all.rects.names) # multi.rect(var.rect,...) #draw the factors not implemented fv <- matrix(unlist(fact.rect),nrow=num.factors,byrow=TRUE) all.rects.x <- fv[,5] #the center of the figure all.rects.y <- fv[,6] all.rects.names <- colnames(factors)[2:(num.factors+1)] # dia.ellipse1(all.rects.x, all.rects.y,all.rects.names) # now show all the loadings as arrows # tv <- matrix(unlist(arrows.list),byrow=TRUE,ncol=20) # cname<- colnames(tv) # tv <- data.frame(tv) # tv[,c(1:18,20)] <- nchar2numeric(tv[,c(1:18,20)]) # colnames(tv) <- cname # text(tv[,1],tv[,2],tv[,3],cex=tv[,4]) # len1 <- as.numeric(tv[1,9]) # len2 <- as.numeric(tv[1,16]) # arrows(x0=tv[,5],y0=tv[,6],x1=tv[,7],y1=tv[,8],length=len1,angle=30,code=1,col=tv[,19],lty=tv[,20],...) # arrows(x0=tv[,12],y0=tv[,13],x1=tv[,14],y1=tv[,15],length=len2,angle=30,code=2,col=tv[,19],lty=tv[,29],...) # multi.arrow(arrows.list,...) } psych/R/scoreOverlap.r0000644000176200001440000002135314550350464014442 0ustar liggesusers"scoreOverlap" <- function(keys,r,correct=TRUE,SMC=TRUE,av.r=TRUE,item.smc=NULL,impute=TRUE,select=TRUE,scores=FALSE, min=NULL,max=NULL) { #function to score clusters according to the key matrix, correcting for item overlap #minor fix on 01/11/24 to handle the case of one key tol=sqrt(.Machine$double.eps) #machine accuracy cl <- match.call() bad <- FALSE key.list <- keys #we need to keep this as a list for later if(is.list(keys) & (!is.data.frame(keys))) { if (select) { select <- selectFromKeyslist(colnames(r),keys) # select <- sub("-","",unlist(keys)) #added April 7, 2017 select <- select[!duplicated(select)] } else {select <- 1:ncol(r) } if (!isCorrelation(r,na.rm=TRUE)) {if(scores) {items <- r[,select]} else {items <- NULL} #save the data for scoring r <- cor(r[,select],use="pairwise")} else {r <- r[select,select]} keys <- make.keys(r,keys)} #added 9/9/16 (and then modified March 4, 2017 if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix if ((dim(r)[1] != dim(r)[2]) ) {r <- cor(r,use="pairwise")} if(any(abs(r[!is.na(r)]) > 1)) warning("Something is seriously wrong with the correlation matrix, some correlations had absolute values > 1! Please check your data.") if(any(is.na(r))) { # SMC=FALSE # warning("Missing values in the correlation matrix do not allow for SMC's to be found") bad <- TRUE} if(SMC && is.null(item.smc)) {item.smc <- smc(r)} else { diag(r) <- NA item.smc <- apply(r,1,function(x) max(abs(x),na.rm=TRUE)) item.smc[is.infinite(item.smc) ] <- 1 diag(r) <- 1} if(all(item.smc ==1)) SMC <- FALSE if(!bad) {covar <- t(keys) %*% r %*% keys} else #matrix algebra is our friend {#covar<- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))*x,na.rm=TRUE)) #matrix multiplication without matrices! covar <- score.na(keys,r,cor=FALSE) } var <- diag(covar) #these are the scale variances n.keys <- ncol(keys) item.var <- item.smc raw.r <- cov2cor(covar) key.var <- diag(t(keys) %*% keys) key.smc <- t(keys) %*% item.smc key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) key.lambda6 <- (var - key.var + key.smc)/var key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r colnames(raw.r) <- rownames(raw.r) <- colnames(keys) names(key.lambda6) <- colnames(keys) key.lambda6 <- drop(key.lambda6) n.keys <- ncol(keys) sn <- key.av.r * key.var/(1-key.av.r) if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations raw.cov <- item.cov %*% keys} else { item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE)) #some correlations are NA have to adjust raw.cov <- apply(keys,2,function(x) colSums(item.cov*x,na.rm=TRUE)) item.cov <- t(item.cov) } adj.cov <- raw.cov #now adjust them med.r <- rep(NA, n.keys) for (i in 1:(n.keys)) { temp <- keys[,i][abs(keys[,i]) > 0] temp <- diag(temp,nrow=length(temp)) small.r <- r[abs(keys[,i])>0,abs(keys[,i])>0] #small.r <- temp %*% small.r %*% temp #this is just flipping the signs, but will not work with missing data if(NROW(temp) > 1) small.r <- apply(temp,2, function(x) colSums(apply(temp,2, function(x) colSums(small.r * x,na.rm=TRUE))*x,na.rm=TRUE)) med.r[i] <- median(small.r[lower.tri(small.r)],na.rm=TRUE) for (j in 1:i) { if(av.r) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] * sqrt(key.av.r[i] * key.av.r[j])) } else { adj.cov[i,j] <- adj.cov[j,i] <- raw.cov[i,j] - sum(keys[,i] * keys[,j] )+ sum( keys[,i] * keys[,j] * sqrt(item.smc[i]* abs(keys[,i])*item.smc[j]*abs(keys[,j]) )) } } } scale.var <- diag(raw.cov) diag(adj.cov) <- diag(raw.cov) adj.r <- cov2cor(adj.cov) #this is the overlap adjusted correlations #find the MIMS values (Average within cluster/scale items) scale.size <- outer(key.var,key.var) MIMS <- adj.cov/scale.size diag(MIMS)<- key.av.r #adjust the item.cov for item overlap #we do this by replacing the diagonal of the r matrix with the item.var (probably an smc, perhaps a maximum value) diag(r) <- item.var if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations } else { item.cov <- t(apply(keys,2,function(x) colMeans(r*x,na.rm=TRUE)) *NROW(keys)) #some correlations are NA } if(n.keys > 1) { item.cor <- sqrt(diag(1/(key.lambda6*scale.var))) %*% (item.cov) # %*% diag(1/sqrt(item.var)) rownames(item.cor) <- colnames(keys) colnames(item.cor) <- colnames(r) } else { item.cor <- r %*% keys /sqrt(key.lambda6*scale.var) } item.cor <- t(item.cor) names(med.r) <- colnames(keys) #find the Multi-Item Multi Trait item x scale correlations #this only makes sense if n.keys > 1 MIMT <- matrix(NA,n.keys,n.keys) for (i in 1:(n.keys)) { temp <- keys[,i][abs(keys[,i]) > 0] if(n.keys > 1){ flip.item <- temp * item.cor[names(temp),,drop=FALSE]} else {flip.item <- temp * item.cor[names(temp)]} if(length(names(temp)) > 1) { if(n.keys >1) {MIMT[i,] <- colMeans(item.cor[names(temp),])}} else {MIMT[i,] <- flip.item} } colnames(MIMT) <- rownames(MIMT) <- colnames(keys) good <- scale_quality(adj.r,item.cor,key.list) names(good) <- names(key.list) if(scores) { abskeys <- abs(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale num.ob.item <- num.item #will be adjusted in case of impute = FALSE n.subjects <- dim(items)[1] item.means <- colMeans(items,na.rm=TRUE) if (is.null(min)) {min <- min(items,na.rm=TRUE)} if (is.null(max)) {max <- max(items,na.rm=TRUE)} if(impute !="none") { miss <- which(is.na(items),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(items,na.rm=TRUE) #replace missing values with means items[miss]<- item.means[miss[,2]]} else { item.med <- apply(items,2,median,na.rm=TRUE) #replace missing with medians items[miss]<- item.med[miss[,2]]} #this only works if items is a matrix scores <- items %*% keys #this actually does all the work but doesn't handle missing values C <- cov(items,use="pairwise") cov.scales <- cov(scores,use="pairwise") #and total scale variance cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys) # sum(C^2) for finding ase } else { #handle the case of missing data without imputation scores <- matrix(NaN,ncol=n.keys,nrow=n.subjects) #we could try to parallelize this next loop for (scale in 1:n.keys) { pos.item <- items[,which(keys[,scale] > 0)] neg.item <- items[,which(keys[,scale] < 0)] neg.item <- max + min - neg.item sub.item <- cbind(pos.item,neg.item) scores[,scale] <- rowMeans(sub.item,na.rm=TRUE) rs <- rowSums(!is.na(sub.item)) num.ob.item[scale] <- mean(rs[rs>0]) #added Sept 15, 2011 # num.ob.item[scale] <- mean(rowSums(!is.na(sub.item))) # dropped } # end of scale loop # we now need to treat the data as if we had done correlations at input } colnames(scores)<- names(key.list) } #end of if scores loop if (correct) {cluster.corrected <- correct.cor(adj.r,t(key.alpha)) result <- list(cor=adj.r,sd=sqrt(var),corrected= cluster.corrected,alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6, item.cor=item.cor, med.r=med.r,quality=good, MIMS=MIMS,MIMT=MIMT,scores=scores,Call=cl) } #correct for attenuation else { result <- list(cor=adj.r,sd=sqrt(var),alpha=key.alpha, av.r = key.av.r, size=key.var,sn=sn,G6 =key.lambda6, item.cor=item.cor, med.r=med.r, scores=scores, Call=cl)} class(result) <- c ("psych", "overlap") return(result)} #modified 01/11/15 to find r if not a square matrix #modifed 03/05/15 to do pseudo matrix multiplication in case of missing data scale_quality = function(phi,r,keys) { #switched from . to _ 6/20/23 nvar <- NROW(r) nscale <- NCOL(r) good <- rep(0,length(keys)) best <- apply(abs(r),1, which.max) for(i in 1:length(keys)) { select <- selectFromKeys(keys[i]) good [i] <- sum(best[select] == i) good[i] <- good[i]/length(select) } return(good) } # scale.quality = function(n.obs,phi,r,keys) { # nvar <- NROW(r) # nscale <- NCOL(r) # best <- good <- rep(0,length(keys)) # best <- apply(abs(r),1, which.max) # for(i in 1:length(keys)) { # select <- selectFromKeys(keys[i]) # good [i] <- sum(best[select] == i) # } # return(good) # } psych/R/test.all.R0000644000176200001440000000216513155340421013454 0ustar liggesuserstest.all <- function(pl,package="psych",dependencies = c("Depends", "Imports", "LinkingTo"),find=FALSE,skip=NULL) { if (find) { pl <-tools::dependsOnPkgs(package,dependencies=dependencies) if(!is.null(skip) && skip %in% pl) {pl <- pl[-which(skip ==pl)]} } np <- length(pl) if(np > 0 ) { for(i in 1:np) { p <- pl[i] test <- require(p,character.only=TRUE) if(!test) {cat("\nCould not find package ",p, "\n") next } cat(paste("\nNow testing package " ,p )) ob <- paste("package",p,sep=":") ol <- objects(ob) nf <- length(ol) options("example.ask"=FALSE) for(i in 1:nf) { fn <- as.character(ol[[i]]) example(topic=fn,package=p,character.only=TRUE,ask=FALSE) } detach(ob,character.only=TRUE) } } else {cat("\nNo dependencies for package ", package) } } #tools::package_dependencies(reverse = TRUE) #lists all the reverse dependencies #tools::check_packages_in_dir(dir,reverse = list()) #might check them, unclear #rd <-reverse_dependencies_with_maintainers("psych") #library(devtools) #does not seem to with development version #dep <- revdep("psych") #revdep_check("psych") psych/R/residuals.psych.R0000644000176200001440000000606413574325470015064 0ustar liggesusers"residuals.psych" <- function(object,diag=TRUE,...) { result <- NULL if(length(class(object)) > 1) { obnames <- cs( fa, principal, omega, irt.fa, esem, extension) extension <- esem <- NA value <- inherits(object, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} } else {value <- "none"} switch(value, fa = {residual <- object$residual}, principal = {residual <- object$residual}, omega = {residual <- object$stats$residual}, irt.fa ={residual <- object$fa$residual}, esem = {residual <- object$residual}, extension = {residual <- object$resid}, none = {stop("No appropriate residual found")} ) if(!diag) diag(residual) <- NA class(residual) <- c("psych","residuals") return(residual) } #added January 30, 2012 "resid.psych" <- function(object,diag=TRUE,...) { residuals(object,diag=diag,...) } #added Feb 4, 2012 #modified April 15, 2016 to add chisquare and histograms as well as to identify variables #modified June 23, 2016 to make the names on the right hand side have pos=2 "plot.residuals" <- function(x,main,type=c("qq","chi","hist","cor"),std,bad=4,numbers=TRUE,upper=FALSE,diag=FALSE,...) { if(missing(type)) type <- "qq" nr <- nrow(x) nc <- ncol(x) if(!is.null(rownames(x))) {rname <- rownames(x)} else {rname <- paste0("V",1:nr)} diag(x) <- NA switch(type, hist = { if(missing(std)) std <- FALSE x <- x[lower.tri(x,diag=TRUE)] std.x <- x/sd(x,na.rm=TRUE) if(std) {if(missing(main)) main <- "Histogram of standardized residuals" hist(std.x,main=main,...)} else { if(missing(main)) main <- "Histogram of residuals" hist(x,main=main,...)}}, qq= { if(missing(std)) std <- TRUE x <- x[lower.tri(x,diag=TRUE)] if(std) { if(missing(main)) main <- "Plot of standardized residuals" std.x <- x/sd(x,na.rm=TRUE) xy <- qqnorm(std.x,main=main) qqline(std.x) worst <- order(abs(std.x), decreasing = TRUE) } else { if(missing(main)) main <- "Plot of raw residuals" xy <- qqnorm(x,main=main,...) qqline(x) worst <- order(abs(x), decreasing = TRUE)} worstItems <- arrayInd(worst[1:bad],c(nr,nc)) pos <- rep(4,bad) pos[x[worst[1:bad]]>0] <- 2 text(xy$x[worst[1:bad]],xy$y[worst[1:bad]],paste(rname[worstItems[,2]],rname[worstItems[,1]]),pos=pos,...) }, chi = {#note that xy reported for qqplot is already sorted if(missing(std)) std <- TRUE x <- x[lower.tri(x,diag=TRUE)] if(std) {x <- x/sd(x,na.rm=TRUE) if(missing(main)) main <- "Plot of squared standardized residuals"} else { if(missing(main)) main <- "Plot of squared residuals"} nx <- length(x) - nr xy <- qqplot(qchisq(ppoints(nx),df=1),y=x^2,main=main,ylab="Quantiles of Squared residuals",xlab="Expected value for quantile") qqline(x^2,distribution=function(p) qchisq(p,df=1)) worst <- order(abs(x^2), decreasing = TRUE) worstItems <- arrayInd(worst[1:5],c(nr,nc)) text(xy$x[nx:(nx-4)],xy$y[nx:(nx-4)],paste(rname[worstItems[,2]],rname[worstItems[,1]]),pos=2,...) }, cor= {if(missing(main)) main <- "Plot of residual correlations" cor.plot(x,main=main,numbers=numbers,upper=upper,diag=diag)}) } psych/R/structure.graph.R0000644000176200001440000004033613572013331015070 0ustar liggesusers#Created April 4, 2008 #seriously modified January, 2009 to generate sem code # January 12, 2009 - still can not get latent Xs to line up if correlated # January 19, 2009 Added the ability to draw structures from omega output # More importantly, added the ability to create sem model matrices for the sem package of John Fox # These models will not be completely identified for certain higher order structures and require hand tuning. # January 30, 2009 to allow for hierarchical factor structures "structure.graph" <- function(fx,Phi=NULL,fy=NULL, out.file=NULL,labels=NULL,cut=.3,errors=TRUE,simple=TRUE,regression=FALSE, size=c(8,6), node.font=c("Helvetica",14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,title="Structural model", ...){ if (!requireNamespace('Rgraphviz')) {stop("I am sorry, you need to have loaded the Rgraphviz package") #create several dummy functions to get around the "no visible global function definition" problem nodes <- function() {} addEdge <- function() {} subGraph <- function(){} } xmodel <- fx ymodel <- fy if(!is.null(class(xmodel)) && (length(class(xmodel))>1)) { if((inherits(xmodel,"psych") && inherits(xmodel,"omega"))) { Phi <- xmodel$schmid$phi xmodel <- xmodel$schmid$oblique} else { if(inherits(xmodel, "psych") && ((inherits(xmodel,"fa") | (inherits(xmodel,"principal"))))) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings)} }} else { if(!is.matrix(xmodel) & !is.data.frame(xmodel) &!is.vector(xmodel)) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings) } else {xmodel <- xmodel} } if(!is.matrix(xmodel) ) {factors <- as.matrix(xmodel)} else {factors <- xmodel} rank.direction <- match.arg(rank.direction) #first some basic setup parameters num.y <- 0 #we assume there is nothing there num.var <- num.xvar <- dim(factors)[1] #how many x variables? if (is.null(num.xvar) ){num.xvar <- length(factors) num.xfactors <- 1} else { num.factors <- num.xfactors <- dim(factors)[2]} if(is.null(labels)) {vars <- xvars <- rownames(xmodel)} else { xvars <- vars <- labels} if(is.null(vars) ) {vars <- xvars <- paste("x",1:num.xvar,sep="") } fact <- colnames(xmodel) if (is.null(fact)) { fact <- paste("X",1:num.xfactors,sep="") } num.yfactors <- 0 if (!is.null(ymodel)) { if(is.list(ymodel) & !is.data.frame(ymodel) ) {ymodel <- as.matrix(ymodel$loadings)} else {ymodel <- ymodel} if(!is.matrix(ymodel) ) {y.factors <- as.matrix(ymodel)} else {y.factors <- ymodel} num.y <- dim(y.factors)[1] if (is.null(num.y)) { num.y <- length(ymodel) num.yfactors <- 1} else { num.yfactors <- dim(y.factors)[2] } yvars <- rownames(ymodel) if(is.null(yvars)) {yvars <- paste("y",1:num.y,sep="") } if(is.null(labels)) {vars <- c(xvars,yvars)} else {yvars <- labels[(num.xvar+1):(num.xvar+num.y)]} yfact <- colnames(ymodel) if(is.null(yfact)) {yfact <- paste("Y",1:num.yfactors,sep="") } fact <- c(fact,yfact) num.var <- num.xvar + num.y num.factors <- num.xfactors + num.yfactors } # sem <- matrix(rep(NA),6*(num.var*num.factors + num.factors),ncol=3) sem <- matrix(NA,nrow=6*(num.var*num.factors + num.factors),ncol=3) colnames(sem) <- c("Path","Parameter","Value") edge.weights <- rep(1,num.var*2*num.factors) #now draw the x part if(!regression) { #the normal condition is to draw a latent model k <- num.factors clust.graph <- new("graphNEL",nodes=c(vars,fact),edgemode="directed") graph.shape <- c(rep("box",num.var),rep("ellipse",num.factors)) #define the shapes if (num.y > 0) {graph.rank <- c(rep("min",num.xvar),rep("max",num.y),rep("same",num.xfactors),rep("",num.yfactors))} else { graph.rank <- c(rep("min",num.var*2),rep("",num.factors))} names(graph.shape) <- nodes(clust.graph) names(graph.rank) <- nodes(clust.graph) edge.label <- rep("",num.var*2*k) #makes too many, but in case of a fully saturated model, this might be necessary edge.name <- rep("",num.var*2*k) names(edge.label) <- seq(1:num.var*2*k) edge.dir <-rep("forward",num.var*2*k) edge.arrows <-rep("open",num.var*2*k) #edge.weights <- rep(1,num.var*2*k) if (num.xfactors ==1) { for (i in 1:num.xvar) { clust.graph <- addEdge(fact[1], vars[i], clust.graph,1) if(is.numeric(factors[i])) {edge.label[i] <- round(factors[i],digits)} else {edge.label[i] <- factors[i]} edge.name[i] <- paste(fact[1],"~",vars[i],sep="") sem[i,1] <- paste(fact[1],"->",vars[i],sep="") if(is.numeric(factors[i])) {sem[i,2] <- vars[i]} else {sem[i,2] <- factors[i] } } k <- num.xvar+1 } else { #end of if num.xfactors ==1 #all loadings > cut in absolute value k <- 1 for (i in 1:num.xvar) { for (f in 1:num.xfactors) { #if (!is.numeric(factors[i,f]) || (abs(factors[i,f]) > cut)) if((!is.numeric(factors[i,f] ) && (factors[i,f] !="0"))|| ((is.numeric(factors[i,f]) && abs(factors[i,f]) > cut ))) { clust.graph <- addEdge(fact[f], vars[i], clust.graph,1) if(is.numeric(factors[i,f])) {edge.label[k] <- round(factors[i,f],digits)} else {edge.label[k] <- factors[i,f]} edge.name[k] <- paste(fact[f],"~",vars[i],sep="") sem[k,1] <- paste(fact[f],"->",vars[i],sep="") if(is.numeric(factors[i,f])) {sem[k,2] <- paste("F",f,vars[i],sep="")} else {sem[k,2] <- factors[i,f]} k <- k+1 } #end of if } } } if(errors) { for (i in 1:num.xvar) { clust.graph <- addEdge(vars[i], vars[i], clust.graph,1) edge.name[k] <- paste(vars[i],"~",vars[i],sep="") edge.arrows[k] <- "closed" sem[k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[k,2] <- paste("x",i,"e",sep="") k <- k+1 } } } else { #the regression case if (title=="Structural model") title <- "Regression model" k <- num.var+1 yvars <- "Y1" clust.graph <- new("graphNEL",nodes=c(vars,yvars),edgemode="directed") graph.rank <- c(rep("min",num.var),rep("",1)) names(graph.rank) <- nodes(clust.graph) graph.shape <- rep("box",k) #define the shapes names(graph.shape) <- nodes(clust.graph) graph.rank <- c(rep("min",num.var),rep("",1)) names(graph.rank) <- nodes(clust.graph) edge.label <- rep("",k) #makes too many, but in case of a fully saturated model, this might be necessary edge.name <- rep("",k) names(edge.label) <- seq(1:k) edge.dir <-rep("back",k) names(edge.dir) <-rep("",k) edge.arrows <-rep("open",k) # names(edge.arrows) <-rep("",k) for (i in 1:num.xvar) { clust.graph <- addEdge(yvars,vars[i], clust.graph,1) if(is.numeric(vars[i])) {edge.label[i] <- round(factors[i],digits)} else {edge.label[i] <- factors[i]} edge.name[i] <- paste(yvars,"~",vars[i],sep="") } } #now, if there is a ymodel, do it for y model if(!is.null(ymodel)) { if (num.yfactors ==1) { for (i in 1:num.y) { clust.graph <- addEdge( yvars[i],fact[1+num.xfactors], clust.graph,1) if(is.numeric(y.factors[i] ) ) {edge.label[k] <- round(y.factors[i],digits) } else {edge.label[k] <- y.factors[i]} edge.name[k] <- paste(yvars[i],"~",fact[1+num.xfactors],sep="") edge.dir[k] <- paste("back") sem[k,1] <- paste(fact[1+num.xfactors],"->",yvars[i],sep="") if(is.numeric(y.factors[i] ) ) {sem[k,2] <- paste("Fy",yvars[i],sep="")} else {sem[k,2] <- y.factors[i]} k <- k +1 } } else { #end of if num.yfactors ==1 #all loadings > cut in absolute value for (i in 1:num.y) { for (f in 1:num.yfactors) { if((!is.numeric(y.factors[i,f] ) && (y.factors[i,f] !="0"))|| ((is.numeric(y.factors[i,f]) && abs(y.factors[i,f]) > cut ))) {clust.graph <- addEdge( vars[i+num.xvar],fact[f+num.xfactors], clust.graph,1) if(is.numeric(y.factors[i,f])) {edge.label[k] <- round(y.factors[i,f],digits)} else {edge.label[k] <-y.factors[i,f]} edge.name[k] <- paste(vars[i+num.xvar],"~",fact[f+num.xfactors],sep="") edge.dir[k] <- paste("back") sem[k,1] <- paste(fact[f+num.xfactors],"->",vars[i+num.xvar],sep="") if(is.numeric(y.factors[i,f])) { sem[k,2] <- paste("Fy",f,vars[i+num.xvar],sep="")} else {sem[k,2] <- y.factors[i,f]} k <- k+1 } #end of if } #end of factor } # end of variable loop } if(errors) { for (i in 1:num.y) { clust.graph <- addEdge(vars[i+num.xvar], vars[i+num.xvar], clust.graph,1) edge.name[k] <- paste(vars[i+num.xvar],"~",vars[i+num.xvar],sep="") edge.dir[k] <- paste("back") edge.arrows[k] <- "closed" sem[k,1] <- paste(vars[i+num.xvar],"<->",vars[i+num.xvar],sep="") sem[k,2] <- paste("y",i,"e",sep="") k <- k+1 }} } #end of if.null(ymodel) nAttrs <- list() #node attributes eAttrs <- list() #edge attributes if (!is.null(labels)) {var.labels <- c(labels,fact) names(var.labels) <- nodes(clust.graph) nAttrs$label <- var.labels names(edge.label) <- edge.name } if(!regression) { if(!is.null(Phi)) {if (!is.matrix(Phi)) { if(!is.null(fy)) {Phi <- matrix(c(1,0,Phi,1),ncol=2)} else {Phi <- matrix(c(1,Phi,Phi,1),ncol=2)}} if(num.xfactors>1) {for (i in 2:num.xfactors) { #first do the correlations within the f set for (j in 1:(i-1)) {if((!is.numeric(Phi[i,j] ) && ((Phi[i,j] !="0")||(Phi[j,i] !="0")))|| ((is.numeric(Phi[i,j]) && abs(Phi[i,j]) > cut ))) { clust.graph <- addEdge( fact[i],fact[j],clust.graph,1) if (is.numeric(Phi[i,j])) { edge.label[k] <- round(Phi[i,j],digits)} else {edge.label[k] <- Phi[i,j]} edge.name[k] <- paste(fact[i],"~",fact[j],sep="") if(!is.numeric(Phi[i,j] )) {if(Phi[i,j] == Phi[j,i] ) { edge.dir[k] <- "both" sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="")} else { if(Phi[i,j] !="0") { edge.dir[k] <- "forward" sem[k,1] <- paste(fact[i]," ->",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="")} else { edge.dir[k] <- "back" sem[k,1] <- paste(fact[i],"<-",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="")} } } else { #is.numeric sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") edge.dir[k] <- "both" if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",i,"F",j,sep="")} else {sem[k,2] <- Phi[i,j] } } edge.weights[k] <- 1 k <- k + 1} } } } #end of correlations within the fx set if(!is.null(ymodel)) { for (i in 1:num.xfactors) { for (j in 1:num.yfactors) { if((!is.numeric(Phi[j+num.xfactors,i] ) && (Phi[j+num.xfactors,i] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i]) && abs(Phi[j+num.xfactors,i]) > cut ))) { clust.graph <- addEdge( fact[j+num.xfactors],fact[i],clust.graph,1) if (is.numeric(Phi[j+num.xfactors,i])) { edge.label[k] <- round(Phi[j+num.xfactors,i],digits)} else {edge.label[k] <- Phi[j+num.xfactors,i]} edge.name[k] <- paste(fact[j+num.xfactors],"~",fact[i],sep="") if(Phi[j+num.xfactors,i]!=Phi[i,j+num.xfactors]) {edge.dir[k] <- "back" sem[k,1] <- paste(fact[i],"->",fact[j+num.xfactors],sep="") } else { edge.dir[k] <- "both" sem[k,1] <- paste(fact[i],"<->",fact[j+num.xfactors],sep="")} if (is.numeric(Phi[j+num.xfactors,i])) {sem[k,2] <- paste("rX",i,"Y",j,sep="")} else {sem[k,2] <- Phi[j+num.xfactors,i] } k <- k + 1 } } } } } } else {if(!is.null(Phi)) {if (!is.matrix(Phi)) Phi <- matrix(c(1,Phi,0,1),ncol=2) for (i in 2:num.xvar) { for (j in 1:(i-1)) { clust.graph <- addEdge( vars[i],vars[j],clust.graph,1) if (is.numeric(Phi[i,j])) { edge.label[k] <- round(Phi[i,j],digits)} else {edge.label[k] <- Phi[i,j]} edge.name[k] <- paste(vars[i],"~",vars[j],sep="") if(Phi[i,j] != Phi[j,i]){edge.dir[k] <- "back"} else {edge.dir[k] <- "both"} k <- k + 1 }} } edge.arrows <- rep("open",k) } for(f in 1:num.factors) { sem[k,1] <- paste(fact[f],"<->",fact[f],sep="") sem[k,3] <- "1" k <- k+1 } obs.xvar <- subGraph(vars[1:num.xvar],clust.graph) if (!is.null(ymodel)) {obs.yvar <- subGraph(vars[(num.xvar+1):num.var],clust.graph)} obs.var <- subGraph(vars,clust.graph) if(!regression) {cluster.vars <- subGraph(fact,clust.graph) } else {cluster.vars <- subGraph(yvars,clust.graph) } observed <- list(list(graph=obs.xvar,cluster=TRUE,attrs=c(rank="min"))) names(edge.label) <- edge.name names(edge.dir) <- edge.name names(edge.arrows) <- edge.name names(edge.weights) <- edge.name nAttrs$shape <- graph.shape nAttrs$rank <- graph.rank eAttrs$label <- edge.label eAttrs$dir<- edge.dir eAttrs$arrowhead <- edge.arrows eAttrs$arrowtail<- edge.arrows eAttrs$weight <- edge.weights attrs <- list(node = list(shape = "ellipse", fixedsize = FALSE),graph=list(rankdir=rank.direction, fontsize=6,bgcolor="white" )) if (!is.null(ymodel)) {observed <- list(list(graph=obs.xvar,cluster=TRUE,attrs=c(rank="sink")),list(graph=obs.yvar,cluster=TRUE,attrs=c(rank="source"))) } else { observed <- list(list(graph=obs.xvar,cluster=TRUE,attrs=c(rank="max")))} plot(clust.graph, nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs,subGList=observed,main=title) if(!is.null(out.file) ){toDotty(clust.graph,out.file,nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs) } #return(list(nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs)) #useful for debugging model=sem[1:(k-1),] class(model) <- "mod" #suggested by John Fox to make the output cleaner invisible(model) } psych/R/predict.principal.R0000644000176200001440000001121414534376061015346 0ustar liggesusers#written November 22, 2010 #revised April 7, 2012 to treat single case problem #revised October 30, 2019 to include bestScales options "predict.psych" <- function(object,data,old.data,options=NULL,missing=FALSE,impute="none",...) { obnames <- cs(fa,bestScales,setCor,pca, lmCor,principal ) value <- inherits(object, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} if(value %in% cs(factor,pca,principal,omega)) value <- "fa" switch(value, fa = { data <- as.matrix(data) if(ncol(data) ==1) data <- t(data) if(missing(old.data)) {data <- scale(data)} else { stats <- describe(old.data) data <- scale(data,center=stats$mean,scale=stats$sd)} wt <- object$weights if(impute !="none") data <- impute.na(data,impute) if(missing) {pred <- matrixMult.na(data,wt)} else { pred <- data %*% wt} }, bestScales = { if(!is.null(options)) {keys<- options} else {keys <- "best.keys"} if(impute != "none") { #for speed we want to just impute those items that will be scored select <- selectFromKeys(keys) data <- impute.na(data,impute)} switch(keys, best.keys = {keys <- object$best.keys scores <- scoreVeryFast(keys,data)}, weights = {keys <- object$weights scores <- scoreWtd(keys,data)}, optimal.keys ={ keys <- object$optimal.keys scores <- scoreVeryFast(keys,data)}, optimal.weights ={ keys <- object$optimal.weights scores <- scoreWtd(keys,data)} ) criteria <- data[names(keys)] bwt <- object$final.stats$r * object$final.stats$crit.sd/ object$final.stats$sd xmean <- object$final.stats$mean ymean <- object$final.stats$crit.mean pred <- t(bwt *(t(scores) - xmean) + ymean ) }, #added January 5, 2020 #revised December 7 , 2023 to allow for prediction of unstandardsized scores lmCor = { data <- as.matrix(data) if(ncol(data) ==1) data <- t(data) vars <- rownames(object$coefficients) old.vars <- vars <- vars[ vars %in% colnames(data)] data <- data[,vars,drop=FALSE] if(object$std)data <- scale(data) unity <- rep(1,NROW(data)) data <- cbind(unity,data) colnames(data)[1]<- rownames(object$coefficients)[1] vars <- rownames(data) if(!object$std) { wt <- object$coefficients} else { if(!missing(old.data)) { old.data <- old.data[,old.vars] old.data <- cbind(unity,old.data) colnames(old.data)[1]<- rownames(object$coefficients)[1] stats <- describe(old.data) data <- scale(data,center=stats$mean,scale=stats$sd) data[,1] <- unity } wt <- object$coefficients} if(impute !="none") data <- impute.na(data,impute) if(missing) {pred <- matrixMult.na(data,wt,scale=FALSE)} else { pred <- data %*% wt} } ) return(pred)} #these next two do not standardize the prediction "predict_principal" <- function(object,data) { wt <- object$weights data <- as.matrix(data) pred <- data %*% wt return(pred) } "predict_fa" <- function(object,data) { wt <- object$weights data <- as.matrix(data) pred <- data %*% wt return(pred) } predict_lmCor.best <- function(object,data,p=.01){ data <- as.matrix(data) vars <- rownames(object$coefficients) vars <- vars[ vars %in% colnames(data)] data <- data[,vars,drop=FALSE] wt <- object$coefficients prob <- object$Probability wt <- wt* (prob < p) wt <- wt[vars,] pred <- data %*% wt } #do matrix multiplication with missing data matrixMult.na <- function(x,y,scale=TRUE) { nvar <- ncol(x) if(nvar != nrow(y) ) stop("matrices are not compatible")#matrices are not compatible if(scale) {x <- scale(x)} #zero center and standaridize #if(scale) {y <- scale(y)} #zero center and standaridize tx <- t(x) #we want to do it on the transposed matrix ny <- ncol(y) result <- matrix(NA,nrow = nrow(x),ncol= ncol(y)) result <- apply(y,2,function(x ) colSums(x * tx,na.rm=TRUE)) #changed to sums rather than means 06/16/21 return((result)) } "impute.na" <- function(x,impute="mean") { miss <- which(is.na(x),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x,na.rm=TRUE) #replace missing values with means x[miss]<- item.means[miss[,2]]} else { item.med <- apply(x,2,median,na.rm=TRUE) #replace missing with medians x[miss]<- item.med[miss[,2]]} return(x) } cor_na <- function(x,y=NULL,scale=TRUE) { nvar <- ncol(x) if(scale) {sx <- scale(x)} else {sx <- x} if(is.null(y)) y <- sx if(nvar != ncol(y) ) stop("matrices are not compatible")#matrices are not compatible #if(scale) x <- scale(x) #zero center and standaridize #we want to do it on the transposed matrix ny <- ncol(y) result <- matrix(NA,nrow = ncol(x),ncol= ncol(y)) result <- apply(y,2,function(x ) colMeans(x * sx,na.rm=TRUE)) return((result)) }psych/R/geometric.mean.R0000644000176200001440000000022513733445560014632 0ustar liggesusers"geometric.mean" <- function(x,na.rm=TRUE){ if (is.null(nrow(x))) {exp(mean(log(x),na.rm=na.rm)) } else { exp(apply(log(x),2,mean,na.rm=na.rm))} } psych/R/sim.dichot.R0000644000176200001440000000177411127464626014007 0ustar liggesusers"sim.dichot" <- function (nvar = 72, nsub = 500, circum = FALSE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, low = 0, high = 0) { avloading <- (xloading + yloading)/2 errorweight <- sqrt(1 - (avloading^2 + gloading^2)) g <- rnorm(nsub) truex <- rnorm(nsub) * xloading + xbias truey <- rnorm(nsub) * yloading + ybias if (circum) { radia <- seq(0, 2 * pi, len = nvar + 1) rad <- radia[which(radia < 2 * pi)] } else rad <- c(rep(0, nvar/4), rep(pi/2, nvar/4), rep(pi, nvar/4), rep(3 * pi/2, nvar/4)) error <- matrix(rnorm(nsub * (nvar)), nsub) trueitem <- outer(truex, cos(rad)) + outer(truey, sin(rad)) item <- gloading * g + trueitem + errorweight * error nvar2 <- nvar/2 iteml <- (item[,(1:nvar2)*2 -1] >= low) itemh <- (item[,(1:nvar2)*2] >= high) item <- cbind(iteml,itemh)+0 return(item) } #revised October 2 to make difficulty and direction of factor loading unconfoundedpsych/R/esem.R0000644000176200001440000002641014536624663012677 0ustar liggesusers#created Sept 4, 2017 to try to do exploratory sem by factoring sets 1 and 2 #and then linking the two sets #slightly improved December 15, 2018 to label the factors better #added cancorDiagram which is functionally just a call to esemDiagram "esem" <- function(r,varsX,varsY,nfX=1,nfY=1,n.obs=NULL,fm="minres",rotate="oblimin", rotateY="oblimin",plot=TRUE, cor="cor", use="pairwise",weight=NULL,...) { if(is.null(colnames(r))) colnames(r) <- rownames(r) <- paste0("V",1:ncol(r)) #vars <- order(c(vars1,vars2)) cl <- match.call() if(is.numeric(varsX)) varsX <- colnames(r)[varsX] if(is.numeric(varsY)) varsY <- colnames(r)[varsY] vars <- c(varsX,varsY) if(is.null(n.obs)) n.obs <- NA if(!isCorrelation(r)) {#find the correlations n.obs <- nrow(r) r <- r[,vars] #we organize the data to be just the ones we want, that is, ignore variables not included in the model switch(cor, cor = {r <- cor(r,use=use)}, cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, tet = {r <- tetrachoric(r)$rho}, poly = {r <- polychoric(r)$rho}, mixed = {r <- mixedCor(r,use=use)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) } #varnames <- colnames(r)[vars] varnames <- vars rownames(r) <- colnames(r) R <- r[varnames,varnames] #This reorganizes R so that it is the order of the selected variables nX <- length(varsX) nY <-length(varsY) df1 <- nX *(nX-1)/2 - nfX * nX + nfX * (nfX-1)/2 df2 <- nY *( nY-1)/2 - nfY * nY + nfY * (nfY-1)/2 f1 <- fa.extend(R,nfX,ov=varsX,ev=varsY,fm=fm,rotate=rotate,...) loads1 <- f1$loadings[varnames,,drop=FALSE] S1 <- f1$Structure[varnames,,drop=FALSE] if(!is.null(ncol(S1))) colnames(loads1) <- colnames(S1) <- paste0("X",1:ncol(loads1)) Phi1 <- f1$Phi f2 <- fa.extend(R,nfY,ov=varsY,ev=varsX,fm=fm,rotate=rotateY,...) loads2 <- f2$loadings[varnames,,drop=FALSE] S2 <- f2$Structure[varnames,,drop=FALSE] if(!is.null(ncol(S2))) {colnames(loads2) <- colnames(S2) <- paste0("Y",1:ncol(loads2))} Phi2 <- f2$Phi f12 <- cbind(loads1,loads2) S12 <- cbind(S1,S2) S12 <- as.matrix(S12) Phi <- t(S12) %*% solve(R) %*% S12 loadsX <- f1$loadings[varsX,,drop=FALSE] loadsY <- f2$loadings[varsY,,drop=FALSE] colnames(loadsX) <- paste0("X",1:ncol(loadsX)) colnames(loadsY) <- paste0("Y",1:ncol(loadsY)) # loadsX <- f1$loadings[colnames(R)[varsX],,drop=FALSE] # loadsY <- f2$loadings[colnames(R)[varsY],,drop=FALSE] diag(Phi) <- 1 #now, a kludge to make it better -- but not actually, so dropped if(FALSE){if(!is.null(Phi1)) Phi[1:nfX,1:nfX] <- Phi1 if(!is.null(Phi2)) Phi[(nfX+1):(nfX+nfY),(nfX+1):(nfX+nfY)] <- Phi2 } result <- esem.stats(R,f12,S12,Phi,n.obs=n.obs) result$n.obs <- n.obs result$loadings <- f12 result$Structure <- S12 result$loadsX <- loadsX result$loadsY <- loadsY result$PhiX <- Phi1 result$PhiY <- Phi2 result$esem.dof <- df1 + df2 result$df1 <- df1 result$df2 <- df2 result$fm <- fm result$fx <- f1$fo result$fy <- f2$fo result$Phi <- Phi result$Call <- cl class(result) <- c("psych","esem") if(plot) esem.diagram(result) return(result)} "esem.stats" <- function(r,f,s,phi,n.obs=NA) { r <- as.matrix(r) n <- ncol(r) nfactors <- ncol(f) if(is.null(nfactors)) nfactors <- 1 Sp <- s %*% solve(phi) #this is not quite f, but is better model <- Sp %*% t(s) #this works better than model <- f %*% t(s) residual <- r - model result <- list() result$communality <- diag(model) result$sumsq <- diag(t(s) %*% (Sp)) r2 <- sum(r*r) rstar2 <- sum(residual*residual) #Alternatively, we can recognize that we are not estimating all of these #this is results$esem.dof result$dof <- dof <- n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2) r2.off <- r2 - tr(r) diag(residual) <- 0 rstar.off <- sum(residual^2) # # m.inv.r <- try(solve(model,r),silent=TRUE) #modified Oct 30, 2009 to perhaps increase precision -- #modified 2015/1/2 to use try # # if(inherits(m.inv.r,"try-error") {warning("the model inverse times the r matrix is singular, replaced with Identity matrix which means fits are wrong") # sum} # if(is.na(n.obs)) {result$n.obs=NA # result$PVAL=NA} else {result$n.obs=n.obs} m.inv.r <- diag(1,n,n) #this is because the m.inv.r is not estimated # result$dof <- n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2) result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) -n #this is what Tucker Lewis call F result$objective <- rstar2 #because the normal way doesn't work # if(is.infinite(result$objective)) {result$objective <- rstar2 # message("The determinant of the smoothed correlation was zero.\nThis means the objective function is not defined.\nChi square is based upon observed residuals.")} result$criteria <- c("objective"=result$objective,NA,NA) if (!is.na(n.obs)) {result$STATISTIC <- chisq <- result$objective * ((n.obs-1) -(2 * n + 5)/6 -(2*nfactors)/3) #from Tucker and from factanal # if (!is.na(n.obs)) {result$STATISTIC <- chisq <- result$objective * ((n.obs-1)) #from Fox and sem if(!is.nan(result$STATISTIC)) if (result$STATISTIC <0) {result$STATISTIC <- 0} if (result$dof > 0) {result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE)} else {result$PVAL <- NA} } result$ENull <- r2.off * n.obs #the empirical null model result$null.dof <- n * (n-1) result$chi <- rstar.off * n.obs #this is the empirical chi square result$rms <- sqrt(rstar.off/(n*(n-1))) #this is the empirical rmsea result$nh <- n.obs if (result$dof > 0) {result$EPVAL <- pchisq(result$chi, result$dof, lower.tail = FALSE) result$crms <- sqrt(rstar.off/(2*result$dof) ) result$EBIC <- result$chi - result$dof * log(n.obs) result$ESABIC <- result$chi - result$dof * log((n.obs+2)/24) result$RMSEA <- RMSEA(result$chi,result$dof,n.obs) } else {result$EPVAL <- NA result$crms <- NA result$EBIC <- NA result$ESABIC <- NA} result$fit <-1-rstar2/r2 result$fit.off <- 1-rstar.off/r2.off result$sd <- sd(as.vector(residual)) #this is the non-sample size weighted root mean square residual result$factors <- nfactors result$complexity <- (apply(f,1,function(x) sum(x^2)))^2/apply(f,1,function(x)sum(x^4)) result$residual <- residual diag(model) <- diag(r) return(result) } "print_psych.esem" <-function(x,digits=2,short=TRUE,cut=NULL, suppress.warnings=TRUE,...) { cat("Exploratory Structural Equation Modeling Analysis using method = ",x$fm ) cat("\nCall: ") print(x$Call) nitems <- nrow(x$loadings) nfactors <- ncol(x$loadings) cat("\nFor the 'X' set:\n") x$loadsX <- as.matrix(x$loadsX) print(round(x$loadsX,digits=digits)) if(!short) { if(!is.null(ncol(x$PhiX))){ cat("\nWith factor intercorrelations of \n") print(round(x$PhiX,digits=digits)) } } cat("\nFor the 'Y' set:\n") x$loadsY <- as.matrix(x$loadsY) print(round(x$loadsY,digits=digits)) if(!short) { if(!is.null(ncol(x$PhiY))) { cat("\nWith factor intercorrelations of \n") print(round(x$PhiY,digits=digits)) } } if(!short) { cat('\nStandardized pattern coefficients on the X and Y sets using Factor Extension\n') L <- cbind(x$loadings,x$communality,1-x$communality) colnames(L)[(ncol(L)-1):ncol(L)] <- c("h2","u2") # print(round(x$loadings,digits=digits),round(x$communality,digits=digits)) print(round(L,digits=digits)) varex <- rbind("SS loadings" = x$sumsq) varex <- rbind(varex, "Proportion Var" = x$sumsq/nitems) varex <- rbind(varex, "Cumulative Var"= cumsum( x$sumsq/nitems)) varex <- rbind(varex, "Cum. factor Var"= cumsum( x$sumsq/sum( x$sumsq))) cat("\n") print(round(varex, digits)) } # if(!is.null(x$complexity)) cat("\nMean item complexity = ",round(mean(x$complexity),1)) # objective <- x$criteria[1] # cat("\nCorrelations between the X and Y sets.\n") print(round(x$Phi,digits=digits)) if(!is.null(x$null.dof)) {cat("\nThe degrees of freedom for the null model are ",x$null.dof, " and the empirical chi square function was ",round(x$ENull,digits),...)} cat("\nThe degrees of freedom for the model are",x$dof," and the empirical chi square function was ",round(x$chi,digits),"\n"," with prob < ", signif(x$EPVAL,digits),"\n" ,...) if(!is.null(x$rms)) {cat("\nThe root mean square of the residuals (RMSR) is ", round(x$rms,digits),"\n") } if(!is.null(x$crms)) {cat("The corrected root mean square of the residuals is ", round(x$crms,digits),"\n",...) } if((!is.null(x$chi)) && (!is.na(x$chi))) {cat(" with the empirical chi square ", round(x$chi,digits), " with prob < ", signif(x$EPVAL,digits),"\n" ,...) } if(!is.na(x$n.obs)) {cat("The total number of observations was ",x$n.obs, " with fitted Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits),"\n",...)} if(!is.null(x$TLI)) cat("\nTucker Lewis Index of factoring reliability = ",round(x$TLI,digits+1)) if(!is.null(x$RMSEA)) {cat("\nRMSEA index = ",round(x$RMSEA[[2]],digits+1), " and the", (1- x$RMSEA[[4]])*100,"% confidence intervals are ",round(x$RMSEA[[1]],digits+1),round(x$RMSEA[[3]],digits+1),...) } if(!is.null(x$EBIC)) {cat("\nEmpirical BIC = ",round(x$EBIC,digits))} if(!is.null(x$ESABIC)) {cat("\nESABIC = ",round(x$ESABIC,digits))} if(!is.null(x$fit)) cat("\nFit based upon off diagonal values =", round(x$fit.off,digits)) if(short) cat("\nTo see the item loadings for the X and Y sets combined, and the associated fa output, print with short=FALSE.\n") # cat("\nTo report the factor analysis of the X and Y sets with their associated statistics, run fa on the X and Y sets separately.") } "interbattery" <- function(r, varsX, varsY, nfX = 1, nfY = 1, n.obs = NULL,cor = "cor", use = "pairwise",weight=NULL) { cl <- match.call() vars <- c(varsX,varsY) if(is.null(n.obs)) n.obs <- NA if(ncol(r) < nrow(r)) {#find the correlations n.obs <- nrow(r) switch(cor, cor = {r <- cor(r,use=use)}, cov = {r <- cov(r,use=use) covar <- TRUE}, tet = {r <- tetrachoric(r)$rho}, poly = {r <- polychoric(r)$rho}, mixed = {r <- mixedCor(r,use=use)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) } varnames <- colnames(r)[vars] R <- r[varnames,varnames] #This reorganizes R so that it is the order of the selected r12 <- r[varsX,varsY] H1 <- r12 %*% t(r12) E1 <- eigen(H1) W1 <- E1$vectors[,1:nfX,drop=FALSE] gamma1 <- sqrt(E1$values[1:nfX,drop=FALSE]) A1 <- W1 %*% diag(sqrt(gamma1),ncol=nfX) W2 <- t(r12) %*% W1 %*% diag(1/gamma1,ncol=nfX) A2 <- W2 %*% diag(sqrt(gamma1)) As <- colSums(sign(A1)) flip <- diag(sign(As),ncol=nfX) A1 <- A1 %*% flip As <- colSums(sign(A2)) flip <- diag(sign(As),ncol=nfX) A2 <- A2 %*% flip colnames(A1) <- colnames(A2) <- paste0("IB",1:ncol(A1)) rownames(A1) <- rownames(r12) return(list(A1=A1,A2 = A2,loadings=rbind(A1,A2),Call=cl)) } psych/R/glbs.R0000644000176200001440000000230314500345724012655 0ustar liggesusers"glb.fa" <- function(r,key= NULL){ cl <- match.call() #for eventual fancy printing with the call listed nvar <- dim(r)[2] #find a correlation matrix if using a data matrix if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")} else { if(!is.matrix(r)) r <- as.matrix(r) r <- cov2cor(r)} #make sure it is a correlation matrix not a covariance or data matrix if(is.null(colnames(r))) { rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") } if (!is.null(key)) { key <- as.vector(key) r <- diag(key) %*% r %*% diag(key) flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } nv <- dim(r)[1] #how many variables f1 <- fa(r) #factor it #first find the eigen values of the factor model nf <-length(which(f1$values > 0)) #how many are real df <- nv * (nv-1)/2 - nf*nv + nf*(nf-1)/2 #check for degrees of freedom if (df < 0 ) nf <- nf-1 fn <- fa(r,nf,rotate="none") rr <- r diag(rr) <- fn$communality #based upon nf factors glb <- sum(rr)/sum(r) return(list(glb=glb,communality = fn$communality,numf = nf,Call=cl)) } psych/R/statsBy.r0000644000176200001440000003231014550627143013423 0ustar liggesusersc#developed July 4, 2012 #modified July 9, 2014 to allow polychorics within groups #modified June 2, 2015 to include covariance, pearson, spearman, poly ,etc. in correlations #Fixed March 3, 2017 to not weight empty cells in finding ICCs #some ideas taken from Bliese multilevel package (specifically, the WABA results) #modifed November 3, 2018 to allow a single DV (in case cohen.d is just comparing two groups on one DV) #corrected January 1, 2019 to allow for grouping variables that are characters #March, 2020 Added the ability to weight groups with external weights #Jan 2022, Added the nWg object to count cell sizes within groups #Dec 2023 Added the within group slope object as output #Dec 2023 Added the ability to specify the min.n (minimum group size) #Jan 2024 Added the check for being a tibble "statsBy" <- function (data, group, cors=FALSE, cor="cor", method="pearson", use="pairwise", poly=FALSE, na.rm=TRUE, alpha=.05, minlength=5, weights=NULL, min.n= 1) { # min size added 12/29/23 cl <- match.call() valid <- function(x) { #count the number of valid cases sum(!is.na(x)) } valid1 <- function(x) { #count the number of valid cases sum(!is.na(x[,1,drop=FALSE] ) ) } #define a function to count pairwise observations pairwise <- function(x) {n <- t(!is.na(x)) %*% (!is.na(x)) n} #get the grouping information if(inherits(data[1], "data.frame",which=TRUE) >1) data <- fix.dplyr(data) #to get around a problem created by dplyr (being a tibble) if(length(group) < NROW(data) ){ #added 01/01/19 to handle the case of non-numeric grouping data if(is.character(group) ) { gr <- which(colnames(data) %in% group) } else {gr <- group} } else {data <- cbind(data,group) group <- "group" gr <- which(colnames(data) %in% group)} z1 <- data[,group] z <- z1 cnames <- colnames(data) for (i in 1:ncol(data)) {if(is.factor(data[,i]) || is.logical(data[,i]) ) { data[,i] <- as.numeric(data[,i])} if (is.character(data[,i])) data[,i] <- as.numeric(as.factor(data[,i])) # colnames(data)[i] <- paste(cnames[i],"*",sep="") } xvals <- list() #find the statistics by group tempcount<- by(data,z,valid1) #this identifies the number of cases per z if(NCOL(tempcount)==1) {tempz <- tempcount[tempcount >= min.n] #but works only for one criteria tt <- z1 %in% names(tempz) data <- data[tt,] z <- z[tt]} temp <- by(data,z,colMeans,na.rm=na.rm) rowname <- dimnames(temp)[[1]] if(length(dimnames(temp))> 1){ #if we have multiple criteria, we need to name them for (i in 2:length(dimnames(temp))) { rowname <- paste0(rep(rowname,each=length(dimnames(temp)[[i]])),"-",dimnames(temp)[[i]])} } rownn <- lapply(temp,is.null) #drop rows without values if(sum(as.integer(rownn)) > 0) { rowname <- rowname[-which(rownn==TRUE)] } # look for missing criteria xvals$mean <- t(matrix(unlist(temp),nrow=ncol(data))) xvals$sd <-t(matrix(unlist(by(data,z,function(x) sapply(x,sd,na.rm=na.rm))),nrow=ncol(data))) xvals$n <- t(matrix(unlist(by(data,z,function(x) sapply(x,valid))),nrow=ncol(data))) colnames(xvals$mean) <- colnames(xvals$sd) <- colnames(xvals$n) <- colnames(data) rownames(xvals$mean) <- rownames(xvals$sd) <- rownames(xvals$n) <- rowname # nH <- harmonic.mean(xvals$n) #this will be 0 if any is zero -- but is not used anyway nG <- colSums(!is.na(xvals$mean)) #fixed this so it is just for the cells that are not NA GM <- colSums(xvals$mean*xvals$n,na.rm=na.rm)/colSums(xvals$n,na.rm=na.rm) MSb <- colSums(xvals$n*t((t(xvals$mean) - GM)^2),na.rm=na.rm)/(nG-1) #weight means by n MSw <- colSums(xvals$sd^2*(xvals$n-1*(xvals$n>0)),na.rm=na.rm)/(colSums(xvals$n-1*(xvals$n>0)))#find the pooled sd #fix this for 0 cell size xvals$F <- MSb/MSw N <- colSums(xvals$n) #overall N for each variable npr <- (colSums(xvals$n-1*(xvals$n > 0))+colSums(xvals$n >0))/(colSums(xvals$n >0)) xvals$ICC1 <- (MSb-MSw)/(MSb + MSw*(npr-1)) xvals$ICC2 <- (MSb-MSw)/(MSb) #now, figure out the cis for the ICCs #taken from the ICC function F11 <- MSb/MSw #df11n <- n.obs-1 #df11d <- n.obs*(nj-1) df11n <- nG - 1 df11d <- nG* (npr-1) # p11 <- 1-pf(F11,df11n,df11d) p11 <- -expm1(pf(F11,df11n,df11d,log.p=TRUE)) #F21 <- MSB/MSE df21n <- N - 1 df21d <- N * (npr-1) # p21 <- 1-pf(F21,df21n,df21d) # F31 <- F21 F1L <- F11 / qf(1-alpha/2,df11n,df11d) F1U <- F11 * qf(1-alpha/2,df11d,df11n) L1 <- (F1L-1)/(F1L+(npr-1)) U1 <- (F1U -1)/(F1U+(npr-1)) L2 <- 1-1/F1L U2 <- 1 -1/F1U xvals$ci1 <-as.matrix(data.frame(L1 = L1,U1 = U1) ) xvals$ci2 <- as.matrix(data.frame(L2 = L2,U2 = U2) ) nWg<- NULL #if we don't find within group correlations, make this NULL #if we want within group correlations, then find them # if(cors) {if(!poly) { r <- by(data,z,function(x) cor(x[-gr],use="pairwise",method=method)) } else { r <- by(data,z,function(x) polychoric(x[-gr])$rho)} #added 02/06/15 if(cors) {if (poly) {cor <- "poly"} switch(cor, cor = {r <- by(data,z,function(x) cor(x[-gr],use=use,method=method))}, cov = {r <- by(data,z,function(x) cov(x[-gr],use=use)) covar <- TRUE}, tet = {r <- by(data,z,function(x) tetrachoric(x[-gr])$rho)}, poly = {r <- by(data,z,function(x) polychoric(x[-gr])$rho)}, mixed = {r <- by(data,z,function(x) mixedCor(x[-gr])$rho)} ) nWg <- by(data,z,function(x) pairwise(x[-gr])) #drop the group nvars <- ncol(r[[1]]) xvals$r <- r #store them as square matrices length.r <- length(r) # xvals$r.ci <- r xvals$r.ci <- mapply(cor.Ci, r=r, n=nWg) attributes(xvals$r.ci) <- attributes(r) lower <- lapply(r,function(x) if(!is.null(x)){ x[lower.tri(x)]}) xvals$within <- t(matrix(unlist(lower),nrow=nvars*(nvars-1)/2)) #string them out as well cnR <- abbreviate(cnames[-gr],minlength=minlength) k <- 1 colnames(xvals$within) <- paste("V",1:ncol(xvals$within)) for(i in 1:(nvars-1)) {for (j in (i+1):nvars) { colnames(xvals$within)[k] <- paste(cnR[i],cnR[j],sep="-") k<- k +1 }} # rownames(xvals$within) <- paste0("z",names(xvals$r)) rownames(xvals$within) <- rowname if(is.null(weights)) { wt <- by(data,z,function(x) pairwiseCount(x[-gr])) #the normal way is to weight by sample size lower.wt <- t(matrix(unlist(lapply(wt,function(x) if(!is.null(x)) { x[lower.tri(x)]}) ) ,nrow=nvars*(nvars-1)/2)) } else { # wt <- matrix(rep(weights,nvars),ncol=nvars) lower.wt <- matrix(rep(weights, nvars * (nvars-1)/2),ncol= nvars * (nvars-1)/2) } #weights are specified in the call lower.wt <- t(t(lower.wt)/colSums(lower.wt,na.rm=TRUE)) #converting to fractions #changed 03/03/20 to weight fisher Z transformed correlations pool <- colSums(fisherz2r( lower.wt * fisherz(xvals$within)),na.rm=TRUE) pool.sd <- apply(xvals$within, 2,FUN=sd, na.rm=TRUE) xvals$pooled <- matrix(0,nvars,nvars) xvals$pooled[lower.tri(xvals$pooled)] <- pool xvals$pooled <- xvals$pooled + t(xvals$pooled) #changed, May 12 to properly reflect values diag(xvals$pooled) <- 1 xvals$sd.r <- matrix(NaN,nvars,nvars) xvals$sd.r[lower.tri(xvals$sd.r)] <- pool.sd xvals$sd.r[upper.tri(xvals$sd.r)] <- pool.sd colnames(xvals$pooled) <- rownames (xvals$pooled) <- cnames[-gr] } nvar <- ncol(data)-length(group) #we have dropped the grouping variable # if(!poly) {xvals$raw <- cor(data,use="pairwise",method=method)} else {xvals$raw <- polychoric(data)$rho} ##added 02/06/15 if (poly) cor <- "poly" switch(cor, cor = {xvals$raw <- cor(data,use=use,method=method)}, cov = {xvals$raw <- cov(data,use=use) covar <- TRUE}, poly= {xvals$raw <- polychoric(data)$rho}, tet = {xvals$raw <- tetrachoric(data)$rho}, mixed = {xvals$raw <- mixedCor(data)$rho} ) new.data <- as.matrix( merge(xvals$mean,data,by=group,suffixes =c(".bg",""))) #drop the grouping variable(s) new.data <- new.data[,(length(group)+1):ncol(new.data)] diffs <- new.data[,(nvar+1):ncol(new.data),drop=FALSE] - new.data[,1:nvar] #Difference of raw data within group - within group mean #why don't we just use scale for the data within each group? colnames(diffs) <- paste(colnames(new.data)[(nvar + 1):ncol(new.data)], ".wg", sep = "") if(nvar > 1) {xvals$rbg <- cor(new.data[,1:nvar],use="pairwise",method=method)} else {xvals$rbg <- NA} #the between group (means) nBg <- pairwise(xvals$mean)[-gr,-gr] if(any(nBg > 2)) {xvals$ci.bg <-cor.Ci(xvals$rbg,nBg,alpha=alpha,minlength=minlength)} else {xvals.ci.bg=NA} #added the test for all nBg <= 2 so we don't call cor.Ci and throw an error 11/2/19 #t <- rep(NA,(length(nG)-length(gr))) if(all(nG > 2)) { t <- (xvals$rbg*sqrt(nG[-gr]-2))/sqrt(1-xvals$rbg^2) # if(any(nG < 3) ) {#warning("Number of groups must be at least 2") xvals$pbg <- 2*(1 - pt(abs(t),(nG-2))) } else { t <- xvals$pbg <- NA } # xvals$rwg <- cor(diffs,use="pairwise",method=method) #the within group (differences) if(cor %in% c("tet","poly","mixed","mixedCor","mixed.cor") ) cor <- "cor" switch(cor, cor = {xvals$rwg <- cor(diffs,use=use,method=method)}, cov = {xvals$rwg <- cov(diffs,use=use) covar <- TRUE } ) #end of switch xvals$nw <- pairwise(diffs) rwg <- cov2cor(xvals$rwg) t <- (rwg*sqrt(xvals$nw -2))/sqrt(1-rwg^2) #changed to greater than 1, May 20, 2020 if(any(NCOL(rwg) > 1)) {xvals$ci.wg <- cor.Ci(rwg,xvals$nw,alpha=alpha,minlength=minlength)} else {xvals.ci.wg=NA} #added the test for all nBg <= 2 so we don't call cor.Ci and throw an error 11/2/19 # xvals$ci.wg <- cor.Ci(rwg,xvals$nw,alpha=alpha,minlength=minlength) #this is a local function if(all(nG > 2)) { xvals$pwg <- 2*(1 - pt(abs(t),(N[-gr] - nG[-gr] -2)))} else {xvals$pwg <- NA} # colnames(xvals$rwg) <- rownames(xvals$rwg) <- paste(colnames(xvals$rwg),".wg",sep="") xvals$etabg <- diag(cor(new.data[,1:(nvar)],new.data[,(nvar+1):ncol(new.data)],use="pairwise",method=method) )#the means with the data xvals$etawg <- diag(cor(new.data[,(nvar+1):ncol(new.data)],diffs,use="pairwise",method=method)) #the deviations and the data names(xvals$etabg) <- colnames(xvals$rbg) xvals$nWg <- nWg #this is the pairwise number of subjects xvals$nwg <- N - nG #why do we report this? Am I making a mistake xvals$nG <- nG #number of groups if(!is.null(xvals$r)) {xvals$slope <- lapply(xvals$r, function(x) x/diag(x)) ncors <- NCOL(xvals$r[[1]]) * (NCOL(xvals$r[[1]])-1)/2 xvals$slope <- matrix(unlist(lapply(xvals$slope, function(x) x[lower.tri(x), drop=FALSE])),byrow=TRUE,ncol=ncors) colnames(xvals$slope) <- colnames(xvals$within) } else { xvals$slope <- NULL} xvals$Call <- cl statsBy <- xvals class(statsBy) <- c("psych","statsBy") return(statsBy) } cor.Ci <- function(r,n,alpha=.05,minlength=10) { cl <- match.call() z.r <- fisherz(r) sd <-1/sqrt(n-3) nvar <- NCOL(r) ci.lower <- fisherz2r(z.r +( qnorm(alpha/2) * sd)) ci.upper <- fisherz2r(z.r + (qnorm(1-alpha/2) * sd)) cnR <- abbreviate(colnames(r),minlength=minlength) ci.lower <- ci.lower[lower.tri(ci.lower)] ci.upper <- ci.upper[lower.tri(ci.upper)] r.ci <- data.frame(lower=ci.lower,r=r[lower.tri(r)],upper=ci.upper) k <- 1 for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { rownames(r.ci)[k] <- paste(cnR[i],cnR[j],sep="-") k <- k + 1 }} result <-list(r.ci=r.ci) class(result) <- cs(psych,corCi ) return(result) } psych/R/scaling.fits.R0000644000176200001440000000144713464305506014324 0ustar liggesusers"scaling.fits" <- function(model, data, test="logit", digits = 2,rowwise=TRUE) { model <- as.matrix(model) data <- as.matrix(data) if (test=="choice") { model <- as.vector(model) if (min(model) <= 0 ) model <- model -min(model) prob = model/(model %+% t(model)) } else { pdif <- model %+%-t(model) if (test=="logit") { prob <- 1/(1 + exp(-pdif)) } else {if (test=="normal") { prob <- pnorm(pdif) }} } if (rowwise) {prob= 1- prob} error <- data - prob sum.error2 <- sum(error^2, na.rm = TRUE) sum.data2 <- sum(data^2, na.rm = TRUE) gof <- 1 - sum.error2/sum.data2 fit <- list(GF = gof, original = sum.data2, resid = sum.error2, residual = round(error, digits)) return(fit) }psych/R/mat.sqrt.R0000644000176200001440000000035011617742153013502 0ustar liggesusers"mat.sqrt" <- function(x) { e <- eigen(x) sqrt.ev <- sqrt(e$values) #need to put in a check here for postive semi definite inv.evec <- solve(e$vectors) result <- e$vectors %*% diag(sqrt.ev) %*% inv.evec result} psych/R/thurstone.R0000644000176200001440000001226314237347547014003 0ustar liggesusers#thurstonian scaling #Thurstone case V (assumption of equal and uncorrelated error variances) #Version of March 22, 2005 #revised April 4, 2008 #Revised March 29, 2009 to be much cleaner code #Do a Thurstone scaling (Case 5) of either a square choice matrix or a rectangular rank order matrix #Need to add output options to allow users to see progress #data are either a set of rank orders or #a set of of choices (columns chosed over row) #output is a list of # scale scores #goodness of fit estimate (1 - sumsq(error)/sumsq(original) # the model choice matrix #the error (residual) matrix #the original choice matrix #modified 5/10/22 to handle case of missing data in ranks "thurstone" <- function(x, ranks = FALSE,digits=2) { #the main routine cl <- match.call() if (ranks) {choice <- choice.mat(x) #convert rank order information to choice matrix } else {if (is.matrix(x)) choice <- x choice <- as.matrix(x)} scale.values <- colMeans(qnorm(choice)) - min(colMeans(qnorm(choice))) #do the Thurstonian scaling model <- pnorm(-scale.values %+% t(scale.values)) error <- model - choice fit <- 1-(sum(error*error)/sum(choice*choice)) result <- list(scale=round(scale.values,digits), GF= fit, choice=choice,residual=error,Call=cl) class(result) <- c("psych","thurstone") return(result)} #the functions used by thurstone (local to thurstone) #if we have rank order data, then convert them to a choice matrix #convert a rank order matrix into a hoice matrix #needs to handle missing data fixed 5/10/22 orders.to.choice <- function(x,y) { #does one subject (row) at a time nvar <- length(x) #changed to length 11/8/20 for (i in 1:nvar) { for (j in 1:nvar) { if(!is.na(x[j]) & !is.na(x[i])) { if (x[j]< x[i] ) {y[i,j] <- y[i,j]+1 } else if (x[j] == x[i]) { y[j,i] <- y[j,i]+.5 #take ties into account y[i,j] <- y[i,j] + .5 } else y[j,i] <- y[j,i]+1 }}} return(y)} # makes repeated calls to orders.to.choice -- can we vectorize this? choice.mat <-function(x) { nsubs<- dim(x)[1] nvar <- dim(x)[2] y <- matrix(0,ncol=nvar,nrow=nvar) for (k in 1:nsubs) { y <-orders.to.choice(x[k,],y) } #is there a way to vectorize this? d <- diag(y) y<- y/(2*d) lower <- 1/(4*nsubs) #in case of 0 or 1, we put limits upper <- 1- lower y[yupper] <- upper return(y) } #irt type data #subjects endorse or do not endorse an item item.to.choice <- function(x) { nsubs<- dim(x)[1] nvar <- dim(x)[2] count <- t(x) %*% x dx <- diag(count) y <- dx - count diag(y) <- dx y <- y/nsubs for (k in 1:nsubs) { temp <- x[k,] for (i in 1:nvar) { for (j in 1:nvar) { if(temp[j]>0) count [i,j] <- count[i,j]+ temp[i] } } } #is there a way to vectorize this? d <- diag(y) y<- y/(2*d) lower <- 1/(4*nsubs) #in case of 0 or 1, we put limits upper <- 1- lower y[yupper] <- upper return(y) } #do hare ranking hare <- function(x, n=1) { #loop over candidates #find the person(s) with the fewest 1 place votes while(NCOL(x) > n) { #for(iter in (1:(NCOL(x)-n))) { #continue until n +1 are left count.first <- colSums(apply(x,2,function(x) x==1), na.rm=TRUE) min.count <- min(count.first) which.min <- which(count.first == min.count) which.min <- which.min[1] #just do one candidate at a time #reallocate their votes who.min <- which(x[,which.min]==1, arr.ind=TRUE) #the row is the person we want if(NROW(who.min )> 0) { if(!is.null(dim(who.min))) who.min <- who.min[,1] for(i in (1:length(who.min))) { x[who.min[i],] <- x[who.min[i],] -1} } #reallocate their votes x <- x[,-which.min,drop=FALSE] #drop the lowest vote getter #do it again until the NCOL = the number we want } if(NCOL(x) > 1){ count.first <- colSums(apply(x,2,function(x) x==1), na.rm=TRUE) result <- sort(count.first,decreasing=TRUE)[1:n]} else {result <- colSums(x==1,na.rm=TRUE) } return(result)} #show the winners #do hare ranking a different way hare2 <- function(x, n=1) { #loop over candidates #find the person(s) with the fewest 1 place votes while(NCOL(x) > n) { #for(iter in (1:(NCOL(x)-n))) { #continue until n +1 are left temp <- apply(x,1,function(x) which(x==1)) tt <- table(unlist(temp)) min.tt <- min(tt) which.min <- which(tt==min.tt) who.min <- which(x[,which.min]==1, arr.ind=TRUE) #the row is the person we want if(NROW(who.min )> 0) { if(!is.null(dim(who.min))) who.min <- who.min[,1] for(i in (1:length(who.min))) { x[who.min[i],] <- x[who.min[i],] -1} } #reallocate their votes x <- x[,-which.min,drop=FALSE] #drop the lowest vote getter #do it again until the NCOL = the number we want } if(NCOL(x) > 1){ count.first <- colSums(apply(x,2,function(x) x==1), na.rm=TRUE) result <- sort(count.first,decreasing=TRUE)[1:n]} else {result <- colSums(x==1,na.rm=TRUE) } return(result)} #show the winn # count first place votes psych/R/omega.bifactor.R0000644000176200001440000000415011631565315014612 0ustar liggesusers"omega.bifactor" <- function(r,nfactors=3,rotate="bifactor",n.obs = NA,flip=TRUE,key=NULL,title="Omega from bifactor",...) { cl <- match.call() if(dim(r)[2] != dim(r)[1]) {n.obs <- dim(r)[1] r <- cor(r,use="pairwise")} nvar <- dim(r)[2] if(is.null(colnames(r))) { rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") } r.names <- colnames(r) if (!is.null(key)) { r <- diag(key) %*% r %*% diag(key) colnames(r) <- r.names #flip items if we choose to do so flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } f <- fa(r,nfactors=nfactors,rotate=rotate,n.obs = n.obs) if (flip) { #should we think about flipping items ? key <- sign(f$loadings[,1]) key[key==0] <- 1 # a rare and weird case where the gloading is 0 and thus needs not be flipped if (sum(key) < nvar) { #some items have negative g loadings and should be flipped r <- diag(key) %*% r %*% diag(key) #this is just flipping the correlation matrix so we can calculate alpha f$loadings <- diag(key) %*% f$loadings signkey <- strtrim(key,1) signkey[signkey=="1"] <- "" r.names <- paste(r.names,signkey,sep="") colnames(r) <- rownames(r) <- r.names rownames(f$loadings) <- r.names } } Vt <- sum(r) #find the total variance in the scale Vitem <- sum(diag(r)) gload <- f$loadings[,1] gsq <- (sum(gload))^2 uniq <- nvar - tr(f$loadings %*% t(f$loadings)) om.tot <- (Vt-uniq)/Vt om.limit <- gsq/(Vt-uniq) alpha <- ((Vt-Vitem)/Vt)*(nvar/(nvar-1)) sum.smc <- sum(smc(r)) lambda.6 <- (Vt +sum.smc-sum(diag(r)))/Vt omega_h= gsq/Vt omega <-list(omega_h= omega_h,alpha=alpha,G6 = lambda.6,omega.tot =om.tot ,key = key,title=title,f=f) class(omega) <- c("psych","bifactor") return(omega) } psych/R/validity.R0000644000176200001440000000524714233542214013561 0ustar liggesusers#Developed 06/24/21 to find mean item validities # x is the set of items # criteria is a single or vector of variables to correlate with the items # keys is a keys.list of scoring keys used to find the scales validityItem <- item.validity <- function(x,criteria,keys) { #first find the correlation of the items with the criteria n.keys <- length(keys) nvar <- NCOL(criteria) key.n <- names(keys) select <- lapply(keys,selectFromKeys) valid <- matrix(NA,nrow=n.keys,ncol=nvar) for(i in 1:n.keys) { list.i <- unlist(keys[[i]]) pos <- rep(1,length(keys[[i]])) neg <- grep("-",list.i) if(!is.null(neg)) pos [neg] <- -1 if(NCOL(criteria)==1) {valid[i,1] <- mean(cor(x[,select[[i]]],criteria,use="pairwise")*pos,na.rm=TRUE) } else { for (j in 1:NCOL(criteria)) { valid[i,j] <- mean(cor(x[,select[[i]]],criteria[j],use="pairwise")*pos,na.rm=TRUE) } } } rownames(valid)<- key.n colnames(valid) <- colnames(criteria) return(valid) } #this combines reliability and predicted validity to produce interesting results predicted.validity <- function(x,criteria,keys,scale.rel=NULL,item.val =NULL) { cl <- match.call() n.keys <- length(keys) asymp <- predicted <- matrix(NA,ncol=NCOL(criteria),nrow=n.keys) #If we provide these, we don't need to find them if(is.null(item.val)) item.val <- validityItem(x,criteria,keys) if(is.null(scale.rel)) scale.rel <- reliability(keys,x) for(i in 1:n.keys) { n.item <- scale.rel$result.df[i,"n.items"] predicted[i,] <- item.val[i,] * n.item /sqrt(n.item + n.item * (n.item -1) * scale.rel$result.df[i,"mean.r"]) asymp[i, ] <- item.val[i,] /sqrt(scale.rel$result.df[i,"mean.r"]) } rownames(predicted) <- rownames(asymp) <- names(keys) colnames(predicted) <- colnames(asymp) <-colnames(item.val) result <- list(predicted=predicted,item.validities=item.val,scale.reliabilities = scale.rel, asymptotic = asymp, Call=cl) class(result) <- c("psych","validity") return(result) } cd.validity<- function(d,keys,abs=TRUE) { #cd must be returned from Cohen.d if(inherits(d, "cohen.d")) { cd <- d$cohen.d[,"effect",drop=FALSE]} else {cd <- as.matrix(d)} #first find the correlation of the items with the criteria #cd <- cd$cohen.d[,"effect",drop=FALSE] n.keys <- length(keys) key.n <- names(keys) select <- lapply(keys,selectFromKeys) valid <- matrix(NA,nrow=n.keys,ncol=1) for(i in 1:n.keys) { list.i <- unlist(keys[[i]]) pos <- rep(1,length(keys[[i]])) neg <- grep("-",list.i) if(!is.null(neg)) pos [neg] <- -1 if(all(select[[i]] %in% rownames(cd))) {valid[i,] <- mean(cd[select[[i]],"effect"]*pos,na.rm=TRUE) } else {valid[i] <- NA} } rownames(valid)<- key.n #colnames(valid) <- colnames(criteria) return(valid) }psych/R/mediate.r0000644000176200001440000011166114551022300013372 0ustar liggesusers#The basic logic is to find the regression of Y on X, of M on X, and of Y on M #and then compare the direct (Y on X) also known as c to c - ab #Modified May, 2015 to allow multiple Xs (and multiple Ys) #Revised June, 2015 for prettier output #Revised February 2016 to get moderation to work #The moderate part is a bit more complicated and needs to be cleaned up #Three potential cases of moderate #a) moderation with out mediation -- not yet treated #b) moderation of the independent to mediator #c) moderation of the mediator to dependent -- not yet treated # #substantially revised May 6, 2016 by using matReg to make simpler to understand #and November,2017 improved to handle formula input and do more moderations #added the zero option to match Hayes Process #added the ability to partial out variables #12/1/18 Added the ability to do quadractic terms (similar to setCor) "mediate" <- function(y,x,m=NULL, data, mod=NULL, z=NULL, n.obs=NULL,use="pairwise",n.iter=5000,alpha=.05,std=FALSE,plot=TRUE,zero=TRUE,part=FALSE,main= "Mediation") { #this first part just gets the variables right, depending upon the way we input them cl <- match.call() #convert names to locations #first, see if they are in formula mode if(inherits(y,"formula")) { ps <- fparse(y) y <- ps$y x <- ps$x m <- ps$m #but, mediation is not done here, so we just add this to x # if(!is.null(med)) x <- c(x,med) #not necessary, because we automatically put this in mod <- ps$prod ex <- ps$ex #the quadratic term #but, we want to drop the m variables from x x <- x[!ps$x %in% ps$m] z <- ps$z #are there any variables to partial } else {ex <- NULL} all.ab <- NULL #preset this in case we are just doing regression if(is.numeric(y )) y <- colnames(data)[y] if(is.numeric(x )) x <- colnames(data)[x] if(!is.null(m)) if(is.numeric(m )) m <- colnames(data)[m] if(!is.null(mod) ) {if(is.numeric(mod)) {nmod <- length(mod) #presumably 1 mod <- colnames(data)[mod] } } if(is.null(mod)) {nmod<- 0} else {nmod<- length(mod)} var.names <- list(IV=x,DV=y,med=m,mod=mod,z=z,ex=ex) # if(any(!(unlist(var.names) %in% colnames(data)))) {stop ("Variable names not specified correctly")} if(any( !(unlist(var.names) %in% colnames(data)) )) { select <- unlist(var.names) cat("\nOops! Variable names are incorrect. Offending items are ", select[which(!(select %in% colnames(data)))],"\n") stop("I am stopping because the variable names are incorrect. See above.")} if(ncol(data) == nrow(data)) {raw <- FALSE if(nmod > 0) {stop("Moderation Analysis requires the raw data") } else {data <- data[c(y,x,m,z),c(y,x,m,z)]} } else { data <- data[,c(y,x,m,z,ex)] } # if(nmod > 0 ) {data <- data[,c(y,x,mod,m)] } else {data <- data[,c(y,x,m)]} #include the moderation variable if(nmod==1) {mod<- c(x,mod) nmod <- length(mod) } #first make sure the data are all numeric #otherwise we choke on factor data data <- char2numeric(data, flag=FALSE) #added 01/14/24 if(!is.matrix(data)) data <- as.matrix(data) if((dim(data)[1]!=dim(data)[2])) {n.obs=dim(data)[1] #this does not take into account missing data if(!is.null(mod)) if(zero) data[,c(x,m,z,ex)] <- scale(data[,c(x,m,z,ex)],scale=FALSE) #0 center but not the dv C <- cov(data,use=use) raw <- TRUE if(std) {C <- cov2cor(C)} #use correlations rather than covariances } else { raw <- FALSE C <- data nvar <- ncol(C) if(is.null(n.obs)) {n.obs <- 1000 message("The data matrix was a correlation matrix and the number of subjects was not specified. \n n.obs arbitrarily set to 1000") } if(!is.null(m)) { # only if we are doing mediation (12/11/18) message("The replication data matrices were simulated based upon the specified number of subjects and the observed correlation matrix.") eX <- eigen(C) #we use this in the bootstrap replications in the case of a correlation matrix data <- matrix(rnorm(nvar * n.obs),n.obs) data <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(data) ) colnames(data) <- c(y,x,m,z)} } if ((nmod > 0 ) | (!is.null(ex))) {if(!raw) {stop("Moderation analysis requires the raw data") } else { if(zero) {data <- scale(data,scale=FALSE)} } } if (nmod > 0 ) { prods <- matrix(NA,ncol=length(ps$prod),nrow=nrow(data)) colnames(prods) <- paste0("V",1:length(ps$prod)) for(i in 1:length(ps$prod)) { prods[,i] <- apply(data[,ps$prod[[i]]],1,prod) colnames(prods)[i] <- paste0(ps$prod[[i]],collapse="*") } data <- cbind(data,prods) x <- c(x,colnames(prods)) } if(!is.null(ex)) { quads <- matrix(NA,ncol=length(ex),nrow=nrow(data)) #find the quadratric terms colnames(quads) <- ex for(i in 1:length(ex)) { quads[,i] <- data[,ex[i]] * data[,ex[i]] colnames(quads)[i] <- paste0(ex[i],"^2") } data <- cbind(data,quads) x <- c(x,colnames(quads)) } #We have now added in products and quadratics, if desired if(raw) {C <- cov(data,use=use) } #else {C <- data} if(std) { C <- cov2cor(C)} ######### ######### #now, we are ready to process the data #We do the basic regressions as matrix operations using matReg xy <- c(x,y) numx <- length(x) numy <- length(y) if(!is.null(m)) {numm <- length(m) nxy <- numx + numy m.matrix <- C[c(x,m),c(x,m),drop=FALSE] #this is the predictor + moderator matrix } else {numm <- 0 nxy <- numx} #the case of moderation without mediation df <- n.obs - nxy - 1 xy.matrix <- C[c(x,m),y,drop=FALSE] #this is the matrix of correlations with the criterion #this next section adds the intercept to the regressions C.int <- matrix(NA,nrow=NROW(C)+1,ncol=ncol(C)+1) C.int[1,] <- C.int[,1] <- 0 C.int[-1,-1] <- C if(!raw) { std <- TRUE C.int[1,1] <- 1 means <- rep(0,NCOL(data))} else { C.int[1,1] <- 0 if(!std){ means <- colMeans(data,na.rm=TRUE)} else { means <- rep(0,ncol(C))} means <- c(1,means) C.int <- C.int * (n.obs-1) + means %*% t(means) * n.obs } rownames(C.int) <- colnames(C.int) <- c("Intercept",colnames(C)) if(std) { C.int <- cov2cor(C.int) } #first, find the complete regression model cprime.reg <- matReg(c("Intercept",x,m),y,C=C.int,n.obs=n.obs,z=z,means=means,std=std,raw=raw,part=part) #we have added the intercept term here (11/25/19) ##this is the zero order beta -- the total effect # total.reg <- matReg(x,y,m=m,z=z,C=C,n.obs=n.obs,std=std,part=part) #include m for correct df, add in the z here to do partial correlations total.reg <- matReg(c("Intercept",x),y,m=m,z=z,C=C.int,n.obs=n.obs,std=std,part=part) #include m for correct df, add in the z here to do partial correlations direct <- total.reg$beta if(!is.null(z)) {colnames(direct) <- paste0(colnames(direct),"*") rownames(direct) <- paste0(rownames(direct),"*") } #There are 3 broad cases that need to be handled somewhat differently in terms of the matrix operations # 1 IV, 1 or more mv # multiple IV, 1 MV #multiple IV, multiple MV #this is the direct path from X to M #For the purposes of moderation, at least for now, think of this as just 2 or 3 IVs #get a, b and cprime effects and their se if(numm > 0) {a.reg <- matReg(x=c("Intercept",x),y=m,C=C.int,z=z,n.obs=n.obs,means=means,std=std,part=part) #the default case is to have at least one mediator b.reg <- matReg(c(x,m),y,C=C,z=z,n.obs=n.obs,part=part) #the df is fudged in matReg -- need to fix that # cprime.reg <- matReg(c("Intercept",x,m),y,C=C.int,n.obs=n.obs,z=z,means=means,std=std) #we have added the intercept term here (11/25/19) a <- a.reg$beta[-1,,drop=FALSE] b <- b.reg$beta[-(1:numx),,drop=FALSE] #this reports just the mediator stats c <- total.reg$beta[-1,,drop=FALSE] cprime <- cprime.reg$beta # ab <- a * b #these are the ab products for each a and b path c' is c - sum of all of these # all.ab <- matrix(NA,ncol=numm*numy,nrow=numx) #We currently only get the all.ab terms if there is just 1 dv all.ab <- matrix(NA,ncol=numm*numx,nrow=numy) # if((numx == 1) & (numy==1)) {ab <- a * t(b)} else {ab <- matrix(NA,ncol=numm*numx,nrow=numy) # for (i in 1:numx) { ab <- t(a[i,] * b[,1:numy])}} #fixed ? November 18, 2019 to handle more than 1 dv for(i in 1:numx) { if((numx == 1) & (numy==1)) {all.ab <- a * t(b)} else { all.ab <- matrix(NA,ncol=numm*numx,nrow=numy) for (i in 1:numx) { all.ab <- t(a[i,] * b[,1:numy])}} # all.ab[i,] <- a[i,] * t(b[,1]) } #just do the first column of b (this is problematic, perhaps and doesn't work for two dependent variables) # colnames(all.ab) <- rep(m, numy) # colnames(all.ab) <- m # rownames(all.ab) <- x ab <- a %*% b #are we sure that we want to do the matrix sum? indirect <- c - ab if(is.null(n.obs)) {message("Bootstrap is not meaningful unless raw data are provided or the number of subjects is specified.") mean.boot <- sd.boot <- ci.quant <- boot <- se <- tvalue <- prob <- NA } else { # if(is.null(mod)) { boot <- p.boot.mediate(data,x,y,m,z,n.iter=n.iter,std=std,use=use) #this returns a list of vectors #the first values are the indirect (c') (directly found), the later values are c-ab from the products #we have now dropped those extra values #this is a mistake if(numy==1) {colnames(boot) <- c(x,paste0(rep(m,each=length(x)),"*",x))} else { cn <- c(x,paste0(rep(m,each=length(x)),"*",x)) shorter.cn <- paste0(rep(m,each=length(x)),"*",x) long.cn <- short.cn <- NULL for(tem in 1:numy) {long.cn <- c(long.cn,paste0(y[tem],cn)) short.cn <- c(short.cn,paste0(y[tem],shorter.cn))} colnames(boot) <- long.cn } #if(ncol(boot)== length(c(x,m) )) colnames(boot) <- c(x,m) # if(length(y) ==1) {boot <- boot[, c(paste0(rep(m,each=length(x)),"*",x)),drop=FALSE]} else { # boot <- boot[,short.cn]} #this picks the mediate variables #except we actually want to keep the indirect value mean.boot <- colMeans(boot) sd.boot <- apply(boot,2,sd) ci.quant <- apply(boot,2, function(x) quantile(x,c(alpha/2,1-alpha/2),na.rm=TRUE)) # mean.boot <- matrix(mean.boot[1:(numx*numy)],nrow=numx) mean.boot <- matrix(mean.boot,nrow=numx) # sd.boot <- matrix(sd.boot[1:(numx*numy)],nrow=numx) sd.boot <- matrix(sd.boot,nrow=numx) # ci.ab <- matrix(ci.quant,nrow=2*numx) ci.ab <- ci.quant # colnames(mean.boot) <- colnames(sd.boot) <- c(y,m) rownames(mean.boot) <- rownames(sd.boot) <- x boots <- list(mean=mean.boot,sd=sd.boot,ci=ci.quant,ci.ab=ci.ab) } } else { #the case of just an interaction term a.reg <- b.reg <- reg <- NA a <- b <- c <- ab <- cprime <- boot<- boots <- indirect <- NA} #beta.x is the effect without the mediators #direct is the effect with the mediators #indirect is ab from the difference #ab is ab from the product of a and b paths if(!is.null(z)) {var.names$IV <- paste0(var.names$IV,"*") var.names$DV <- paste0(var.names$DV,"*") var.names$med <- paste0(var.names$med,"*") colnames(C) <- rownames(C) <- paste0(colnames(C),"*") } result <- list(var.names=var.names,a=a,b=b,ab=ab,all.ab = all.ab,c=c,direct=direct,indirect=indirect,cprime = cprime, total.reg=total.reg,a.reg=a.reg,b.reg=b.reg,cprime.reg=cprime.reg,boot=boots,boot.values = boot,sdnames=colnames(data),data=data,C=C, Call=cl) class(result) <- c("psych","mediate") if(plot) {if(is.null(m)) {moderate.diagram(result) } else { mediate.diagram(result,main=main) } } return(result) } #a helper function to find regressions from covariances #May 6, 2016 #Fixed November 29, 2018 to handle se of partialed variables correctly #modified September 25, 2019 to find intercepts and standard errors using momements #modified even more November 25, 2019 to return the intercepts and R2 #correct May 31, 2021 to correctly estimate dfs with and without intercept terms (finally) matReg <- function(x,y,C,m=NULL,z=NULL,n.obs=0,means=NULL,std=FALSE,raw=TRUE,part=FALSE) { if(is.null(n.obs)) n.obs <- 0 numx <- length(x) #this is the number of predictors (but we should adjust by the number of covariates) numz <- length(z) numy <- length(y) #df <- n.obs -1 - numx - length(z) - length(m) #but this does not take into account the mediating variables #note that the x variable includes the intercept and thus uses up one extra df # # df <- n.obs - numx -numz #We have partialed out z, should we use the df from it? This is changed 11/26/19 to reduce df for z # This df is correct if we are calculating the intercept but is off if we we are not if("Intercept" %in% colnames(C)) { df <- n.obs - numx -numz } else {df <- n.obs -1 - numx -numz } #05/31/21 Cr <- cov2cor(C) if(!is.null(z)){numz <- length(z) #partial out the z variables zm <- C[z,z,drop=FALSE] za <- C[x,z,drop=FALSE] zb <- C[y,z,drop=FALSE] zmi <- solve(zm) x.matrix <- C[x,x,drop=FALSE] - za %*% zmi %*% t(za) if(!part) y.matrix <- C[y,y,drop=FALSE] - zb %*% zmi %*% t(zb) #part versus partial (default) xy.matrix <- C[x,y,drop=FALSE] - za %*% zmi %*% t(zb) C <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix)) } if(numx==1) { beta <- solve(C[x,x,drop=FALSE],(C[x,y,drop=FALSE])) colnames(beta) <- y } else { beta <- solve(C[x,x],(C[x,y])) } #this is the same as setCor and is a x * x matrix if(!is.matrix(beta)) {beta <- matrix(beta,nrow=length(beta))} #beta is a matrix of beta weights if(is.character(x)) {rownames(beta) <- x} else {rownames(beta) <- colnames(C)[x]} if(is.character(y)) { colnames(beta) <- y} else { colnames(beta) <- colnames(C)[y]} x.inv <- solve(C[x,x]) #solve x.matrix #taken from setCor yhat <- t(C[x,y,drop=FALSE]) %*% x.inv %*% C[x,y,drop=FALSE] resid <- C[y,y]- yhat if(!std ) { # df <- n.obs - numx - numz Residual.se <- sqrt(diag(resid /df)) #this is the df n.obs - length(x)) se <- MSE <- diag(resid )/(df) if(length(y) > 1) {SST <- diag(C[y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)} R2 <- (SST - diag(resid)) /SST se.beta <- list() for (i in 1:length(y)) { se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv)) } se <- matrix(unlist(se.beta),ncol=numy) if(length(y) > 1) {SST <- diag(C [y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)} R2 <- (SST - diag(resid)) /SST } else { R2 <- colSums(beta * C[x,y])/diag(C[y,y,drop=FALSE]) #the standardized case uniq <- 1-(1-1/diag(solve(Cr[x,x,drop=FALSE]))) #1- smc if(n.obs > 2) { # se <- (sqrt((1-R2)/(n.obs-1 - numx-numz)) %*% t(sqrt(1/uniq))) #these are the standardized se se <- (sqrt((1-R2)/(df)) %*% t(sqrt(1/uniq))) #setCor uses df = n.obs - numx - 1 se <- t( se * sqrt(diag(C[y,y,drop=FALSE])) %*% t(sqrt(1/diag(C[x,x,drop=FALSE]))) ) #But does this work in the general case? colnames(se) <- colnames(beta) } else {se <- NA} # if(raw) { #used to compare models -- we need to adjust this for dfs # Residual.se <- sqrt((1-R2)* df/(df-1)) } else { #this was a kludge which was necessary to treat the SSR correctly -- probably because I was off on df Residual.se <- sqrt((1-R2)/df * (n.obs-1)) #} } if(!any(is.na(se))) { tvalue <- beta/se # prob <- 2*(1- pt(abs(tvalue),df)) prob <- -2 * expm1(pt(abs(tvalue),df,log.p=TRUE)) } else {tvalue <- prob <- df <- NA} result <- list(beta=beta,se=se, t=tvalue,df=df,prob=prob,R2=R2,SE.resid=Residual.se) return(result) } ####### partialReg <- function(C,x,y,m,z) { x <- c(x,m) y <- c(y,m) numz <- length(z) #partial out the z variables zm <- C[z,z,drop=FALSE] za <- C[x,z,drop=FALSE] zb <- C[y,z,drop=FALSE] zmi <- solve(zm) x.matrix <- C[x,x,drop=FALSE] - za %*% zmi %*% t(za) y.matrix <- C[y,y,drop=FALSE] - zb %*% zmi %*% t(zb) xy.matrix <- C[x,y,drop=FALSE] - za %*%zmi %*% t(zb) C <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix)) } #finally fixed November 3, 2019 boot.mediate <- function(data,x,y,m,z,n.iter=10,std=FALSE,use="pairwise") { n.obs <- nrow(data) numx <- length(x) numy <- length(y) numm <- length(m) nxy <- numx + numy result <- matrix(NA,nrow=n.iter,ncol = (numx*numy+ numm*numy*numx )) if((numm > 1) & (numx > 1)) ab <- matrix(0,nrow=numx,ncol=numy) for (iteration in 1:n.iter) { samp.data <- data[sample.int(n.obs,replace=TRUE),] C <- cov(samp.data,use=use) if(!is.null(z)) C <- partialReg(C,x,y,m,z) #partial out z if(std) C <- cov2cor(C) xy <- c(x,y) m.matrix <- C[c(x,m),c(x,m)] # df <- n.obs - nxy - 1 xy.matrix <- C[c(x,m),y,drop=FALSE] if(numx ==1) { beta.x <- solve(C[x,x],t(C[x,y]) ) } else {beta.x <- solve(C[x,x],C[x,y]) } #this is the zero order beta -- the total effect # if(numx ==0) { a <- solve(C[x,x,drop=FALSE],t(C[x,m,drop=FALSE]) ) } else { a <- solve(C[x,x,drop=FALSE],(C[x,m,drop=FALSE]) )} #the a paths a <- solve(C[x,x,drop=FALSE],(C[x,m,drop=FALSE])) beta.xm <- solve(m.matrix,xy.matrix) #solve the equation bY~aX beta <- as.matrix(beta.xm) #these are the individual predictors, x and then m b <- beta[-c(1:numx),,drop=FALSE] if((numx == 1) & (numy==1)) {ab <- a[-1] * t(b)} else {ab <- array(NA,dim=c(numx,numm,numy)) for(j in 1:numy) { for(k in 1:numm) { for (i in 1:numx) { ab[i,k,j] <- t(a[i,k] * b[k,j])}} #this needs to be fixed for two numm>1 }} # ab <- a %*% b #each individual path #this probably is only correct for the numx = 1 model # if((numx > 1) & (numy > 1)) {for (i in 1:numx) {ab[i,] <- a[i,] * b}} # ab <- a * b #we don't really need this #all.ab <- matrix(NA,ncol=numm*numy,nrow=numx*numm) #The number of all.ab terms is ( numx * numm) * (numm * numy) # for(j in 1:numm*numy) { # for(i in 1:numx*numm) {all.ab[i,j] <- a[i,] * t(b[j,i])} #this just does one column of b # #consider muliple ivs # all.ab <-outer(a,t(b)) #this is cute,but actually not correct #all.ab <- matrix(all.ab,nrow=numx*numm,ncol=numm*numy) all.ab <- ab indirect <- beta.x - beta[1:numx,1:numy] #this is c' = c - ab # result[iteration,] <- c(indirect,ab) #this is a list of vectors result[iteration,] <- c(indirect, all.ab) #this is a list of vectors -- do we really need all all.ab since it is identical to indirect? } return(result) } short.boot <- function(i,data,n.obs,x,y,m,z,std,use=use ){ numx <- length(x) numy <- length(y) numm <- length(m) nxy <- numx + numy samp.data <- data[sample.int(n.obs,replace=TRUE),] C <- cov(samp.data,use=use) if(!is.null(z)) C <- partialReg(C,x,y,m,z) #partial out z if(std) C <- cov2cor(C) xy <- c(x,y) m.matrix <- C[c(x,m),c(x,m)] # df <- n.obs - nxy - 1 xy.matrix <- C[c(x,m),y,drop=FALSE] if(numx ==1) { beta.x <- solve(C[x,x],t(C[x,y]) ) } else {beta.x <- solve(C[x,x],C[x,y]) } #this is the zero order beta -- the total effect # if(numx ==0) { a <- solve(C[x,x,drop=FALSE],t(C[x,m,drop=FALSE]) ) } else { a <- solve(C[x,x,drop=FALSE],(C[x,m,drop=FALSE]) )} #the a paths a <- solve(C[x,x,drop=FALSE],(C[x,m,drop=FALSE])) beta.xm <- solve(m.matrix,xy.matrix) #solve the equation bY~aX beta <- as.matrix(beta.xm) #these are the individual predictors, x and then m b <- beta[-c(1:numx),,drop=FALSE] if((numx == 1) & (numy==1)) {ab <- a * t(b)} else {ab <- array(NA,dim=c(numx,numm,numy)) #was a[-1] for(j in 1:numy) { for(k in 1:numm) { for (i in 1:numx) { ab[i,k,j] <- t(a[i,k] * b[k,j])}} #this needs to be fixed for two numm>1 }} # ab <- a %*% b #each individual path #this probably is only correct for the numx = 1 model # if((numx > 1) & (numy > 1)) {for (i in 1:numx) {ab[i,] <- a[i,] * b}} # ab <- a * b #we don't really need this #all.ab <- matrix(NA,ncol=numm*numy,nrow=numx*numm) #The number of all.ab terms is ( numx * numm) * (numm * numy) # for(j in 1:numm*numy) { # for(i in 1:numx*numm) {all.ab[i,j] <- a[i,] * t(b[j,i])} #this just does one column of b # #consider muliple ivs # all.ab <-outer(a,t(b)) #this is cute,but actually not correct #all.ab <- matrix(all.ab,nrow=numx*numm,ncol=numm*numy) all.ab <- ab indirect <- beta.x - beta[1:numx,1:numy] #this is c' = c - ab # result[iteration,] <- c(indirect,ab) #this is a list of vectors result <- c(indirect, all.ab) #this is a list of vectors -- do we really need all all.ab since it is identical to indirect? return(result) } p.boot.mediate <- function(data,x,y,m,z,n.iter=10,std=FALSE,use="pairwise") { n.obs <- nrow(data) numx <- length(x) numy <- length(y) numm <- length(m) nxy <- numx + numy result <- matrix(NA,nrow=n.iter,ncol = (numx*numy+ numm*numy*numx )) if((numm > 1) & (numx > 1)) ab <- matrix(0,nrow=numx,ncol=numy) p.result <- list() #mapply to debug p.result <- t(mcmapply(short.boot,c(1:n.iter),MoreArgs=list(data,n.obs=n.obs,x=x,y=y,m=m,z=z,std=std,use=use))) #added the transpose 10/03/20 return(p.result) } "mediate.diagram" <- function(medi,digits=2,ylim=c(3,7),xlim=c(-1,10),show.c=TRUE, main="Mediation model",cex=1,l.cex=1,...) { if(missing(l.cex)) l.cex <- cex dv <- medi$var.names[["DV"]] # iv <- medi$var.names[["IV"]] iv <- as.matrix(rownames(medi$direct)) if(iv[1] =="Intercept") {iv <- iv[-1] direct <- round(medi$direct[-1,,drop=FALSE],digits)} else { direct <- round(medi$direct,digits)} mv <- medi$var.names[["med"]] mod <- medi$var.names[["mod"]] # numx <- length(medi$var.names[["IV"]]) numy <- length(dv) # direct <- round(medi$direct,digits) if(!("Intercept*" %in% rownames(C))) {iv <- as.matrix(rownames(medi$direct)[-1])} numx <- NROW(iv) C <- round(medi$C[c(iv,mv,dv),c(iv,mv,dv)],digits) #if have moderated effects, this is the same as more xs miny <- 5 - max(length(iv)/2,length(mv),2) - .5 maxy <- 5 + max(length(iv)/2,length(mv),2) + .5 if(missing(xlim)) xlim=c(-numx * .67,10) if(missing(ylim)) ylim=c(miny,maxy) plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="") var.names <- c(rownames(medi$direct),colnames(medi$direct),rownames(medi$b)) if(is.null(mv)) {n.mediate <- 0} else {n.mediate <- length(mv)} m <- list() arrow.list <- list() rect.list <- list() #c <- as.matrix(round(medi$total,digits)) c <- as.matrix(round(medi$c,digits)) a <- as.matrix(round(medi$a,digits)) if(ncol(a)==1) a <- t(a) b <- as.matrix(round(medi$b,digits)) cprime <- as.matrix(round(medi$cprime,digits)) x <- list() if((numx > 1) && (n.mediate > 1) ) {adj <- 3} else {adj <- 2} #this fixes where to put the labels on the a path viv <- 1:numx for(i in 1:numx) { if((numx %% 2)== 0) { viv[i] <- switch(i,7,3,6,4,8,2,9,1,10) } else { viv[i] <- switch(i,5,7,3,6,4,8,2,9)} x[[i]] <- dia.rect(1,viv[i],iv[i],cex=cex,draw=FALSE,...) rect.list <- c(rect.list,iv[i],x[[i]])} vdv <- 1:numy y <- list() for (i in 1:numy) { if((numy %% 2)== 0) { vdv[i] <- switch(i,6,4,7,3,8,2,9,1,10) } else { vdv[i] <- switch(i,5,7,3,6,4,8,2,9)} y[[i]] <- dia.rect(9,vdv[i],dv[i],cex=cex,draw=FALSE,...) rect.list <- c(rect.list,dv[i],y[[i]]) } #y <- dia.rect(9,5,dv) v.loc <- 1:n.mediate if(n.mediate > 0) { for (mediate in 1:n.mediate) { if((n.mediate %% 2) ==0) {v.loc[mediate] <- switch(mediate,7,3,9,1,6,4,7,3,10) } else { switch(numx, 1: {v.loc[mediate] <- switch(mediate,7,3,8,1,6,4,9,2)}, 2 : {v.loc[mediate] <- switch(mediate,5,3,7,2,6,4,8,2)}, 3: {v.loc[mediate] <- switch(mediate,5.5,3,7,2,5,4,8,2)}, 4: {v.loc[mediate] <- switch(mediate,5,3,7,2,6,4,8,2)}, 5: {v.loc[mediate] <- switch(mediate,6,3,7,2,5,4,8,2)}, 6: {v.loc[mediate] <- switch(mediate,5,3,7,2,6,4,8,2)}, 7: {v.loc[mediate] <- switch(mediate,6,3,7,2,5,4,8,2)}) } } } v.loc <- sort(v.loc,decreasing=TRUE) if(n.mediate ==0) { for(j in 1:numy) { for(i in 1: numx) { d.arrow <- dia.arrow(x[[i]]$right,y[[j]]$left,labels=paste("c = ",direct[i,j]),pos=NULL,cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow)} } } else { if(n.mediate==1) a <- t(a) for (mediate in 1:n.mediate) { m[[mediate]] <- dia.rect(5,v.loc[mediate],mv[mediate],cex=cex,draw=FALSE,... ) rect.list <- c(rect.list,mv[mediate],m[[mediate]]) for(j in 1:numy) { for(i in 1: numx) {d.arrow <- dia.arrow(x[[i]]$right,m[[mediate]]$left,labels=a[i,mediate],adj=adj,cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow) #a term if(show.c) {d.arrow <- dia.arrow(x[[i]]$right,y[[j]]$left,labels=paste("c = ",c[i,j]),pos=3,cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow)} d.arrow <- dia.arrow(x[[i]]$right,y[[j]]$left,labels=paste("c' = ",cprime[i+1,j]),pos=1,cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow)} #we have an intercept d.arrow <- dia.arrow(m[[mediate]]$right,y[[j]]$left,labels=b[mediate,j],cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow) # } } } rviv <- max(viv) if(numx >1) { for (i in 2:numx) { for (k in 1:(i-1)) {dia.curved.arrow(x[[i]]$left,x[[k]]$left,C[i,k],scale=-(numx-1)*(abs(viv[i]-viv[k])/rviv),both=TRUE,dir="u",cex=l.cex,...)} } } multi.rect(rect.list,...) multi.arrow(arrow.list,...) } "moderate.diagram" <- function(medi,digits=2,ylim=c(2,8),main="Moderation model",cex=1,l.cex=1,...) { if(missing(l.cex)) l.cex <- cex xlim=c(0,10) #plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="") var.names <- rownames(medi$direct) x.names <- rownames(medi$direct) if(x.names[1] == "Intercept") {x.names <- x.names[-1] beta <- round(medi$direct[-1,,drop=FALSE],digits)} else { beta <- round(medi$direct,digits)} #fixed 10/29/20 y.names <- colnames(medi$direct) #beta <- round(medi$direct,digits) nx <- length(x.names) ny <- length(y.names) top <- max(nx,ny) xlim=c(-nx/3,10) ylim=c(0,top) top <- max(nx,ny) x <- list() y <- list() x.scale <- top/(nx+1) y.scale <- top/(ny+1) plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="",...) for(i in 1:nx) {x[[i]] <- dia.rect(2,top-i*x.scale,x.names[i],cex=cex,...) } for(j in 1: ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,...) } y[[1]] <- dia.rect(7,top-y.scale,y.names[1],cex=cex,...) # dia.arrow(x[[1]]$right,y[[j]]$left,labels=paste("c = ",c),pos=3,...) #dia.arrow(x[[1]]$right,y[[j]]$left,labels=paste("c' = ",cprime),pos=1,...) for(j in 1:ny){ for(i in 1:nx) { dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=2,cex=l.cex,...) } } if(nx >1) { for (i in 2:nx) { for (k in 1:(i-1)) {dia.curved.arrow(x[[i]]$left,x[[k]]$left,round(medi$C[i+1,k+1],2),scale= -(abs(k-i)),both=TRUE,dir="u",cex=l.cex,...)} } } } #finally got the print to work on multiple dvs 11/24/19 "summary_psych.mediate" <- function(x,digits=2,short=FALSE) { cat("Call: ") print(x$Call) dv <- x$var.names[["DV"]] # iv <- x$var.names[["IV"]] mv <- x$var.names[["med"]] mod <- x$var.names[["mod"]] # dv <- x$names[1] iv <- rownames(x$direct)[-1] niv <- length(iv) nmed <- length(mv) ndv <- length(dv) nz <- length(x$var.names[["z"]]) if(nmed < 1) { cat("\nNo mediator specified leads to traditional regression \n") } else { cat("\nDirect effect estimates (traditional regression) (c') X + M on Y \n")} #print the traditional regression values for(j in 1:ndv) { if (niv==1) { dfd <- round(data.frame(direct=x$cprime.reg$beta[,j],se = x$cprime.reg$se[,j],t=x$cprime.reg$t[,j],df=x$cprime.reg$df),digits) dfdp <- cbind(dfd,p=signif(x$cprime.reg$prob[,j],digits=digits+1)) } else { dfd <- round(data.frame(direct=x$cprime.reg$beta[1:(niv+1+nmed),j],se = x$cprime.reg$se[1:(niv+1+nmed),j],t=x$cprime.reg$t[1:(niv+1+nmed),j],df=x$cprime.reg$df),digits) dfdp <- cbind(dfd,p=signif(x$cprime.reg$prob[1:(niv+1+nmed),j],digits=digits+1)) } colnames(dfdp) <- c(dv[j],"se","t","df","Prob") print(dfdp) F <- x$cprime.reg$df * x$cprime.reg$R2[j]/(((nrow(x$cprime.reg$beta)-1) * (1-x$cprime.reg$R2[j]))) pF <- -expm1(pf(F,nrow(x$cprime.reg$beta)-1,x$cprime.reg$df,log.p=TRUE)) cat("\nR =", round(sqrt(x$cprime.reg$R2[j]),digits),"R2 =", round(x$cprime.reg$R2[j],digits), " F =", round(F,digits), "on",nrow(x$cprime.reg$beta)-1, "and", x$cprime.reg$df,"DF p-value: ",signif(pF,digits+1), "\n") } if(nmed > 0) { cat("\n Total effect estimates (c) (X on Y) \n") for(j in 1:ndv) { dft <- round(data.frame(direct=x$total.reg$beta[,j],se = x$total.reg$se[,j],t=x$total.reg$t[,j],df=x$total.reg$df),digits) dftp <- cbind(dft,p = signif(x$total.reg$prob[,j],digits=digits+1)) colnames(dftp) <- c(dv[j],"se","t","df","Prob") rownames(dftp) <- rownames(x$total.reg$beta) print(dftp) } cat("\n 'a' effect estimates (X on M) \n") for(j in 1:ndv) { if(niv==1) { for(i in 1:nmed) { dfa <- round(data.frame(a = x$a.reg$beta[,i],se = x$a.reg$se[,i],t = x$a.reg$t[,i],df= x$a.reg$df),digits) dfa <- cbind(dfa,p=signif(x$a.reg$prob[,i],digits=digits+1)) if(NROW(dfa) ==1) {rownames(dfa) <- rownames(x$a.reg$beta) colnames(dfa) <- c(colnames(x$a.reg$beta),"se","t","df", "Prob")} else { rownames(dfa) <- rownames(x$a.reg$beta) colnames(dfa) <- c(colnames(x$a.reg$beta)[i],"se","t","df", "Prob")} print(dfa)} } else { for (i in 1:nmed) { dfa <- round(data.frame(a = x$a.reg$beta[,i],se = x$a.reg$se[,i],t = x$a.reg$t[,i],df= x$a.reg$df),digits) dfa <- cbind(dfa,p=signif(x$a.reg$prob[,i],digits=digits+1)) rownames(dfa) <-rownames(x$a.reg$beta) colnames(dfa) <- c(colnames(x$a.reg$beta)[i],"se","t","df","Prob") print(dfa) } } } cat("\n 'b' effect estimates (M on Y controlling for X) \n") for (j in 1:ndv) { if(niv==1) { dfb <- round(data.frame(direct=x$b.reg$beta[-(1:niv),j],se = x$b.reg$se[-(1:niv),j],t=x$b.reg$t[-(1:niv),j], df=x$b.reg$df),digits) dfb <- cbind(dfb,p=signif(x$b.reg$prob[-(1:niv),j],digits=digits+1))} else { dfb <- round(data.frame(direct=x$b.reg$beta[-(1:niv),j],se = x$b.reg$se[-(1:niv),j],t=x$b.reg$t[-(1:niv),j],df=x$b.reg$df),digits) dfb <- cbind(dfb,p=signif(x$b.reg$prob[-(1:niv),j],digits=digits+1))} rownames(dfb) <- rownames(x$b.reg$beta)[-(1:niv)] colnames(dfb) <- c(dv[j],"se","t","df", "Prob") print(dfb) } #not clear how this is different from next section #the indirect is correct, but the mediators (boot) need to be summed cat("\n 'ab' effect estimates (through all mediators)\n") #need to think about the number of mediators as well as ndv and niv for (j in 1:ndv) { #currently only works for ndv =1 if(niv > 1){ dfab <- round(data.frame(indirect = x$ab[,j],boot = x$boot$mean[,j], sd=x$boot$sd[,j], # lower=x$boot$ci[,((j-1)*(niv+ndv+nmed)+niv+1):((j)*(niv+ndv+nmed))], # upper=x$boot$ci[,((j-1)*(niv+ndv+nmed)+niv+1):((j)*(niv+ndv+nmed))]),digits)} else { lower=x$boot$ci[1,j], upper=x$boot$ci[2,j]),digits)} else { dfab <-round(data.frame(indirect = x$ab[,j],boot = x$boot$mean[,j] ,sd=x$boot$sd[,j], # # lower=x$boot$ci[1,1:niv], #was niv perhaps should be ndv? # # upper=x$boot$ci[2,1:niv]),digits) lower=x$boot$ci[1,(1:niv + niv*(j-1))], upper=x$boot$ci[2,(1:niv + niv*(j-1))]),digits) # # lower=x$boot$ci[1,(j*niv )], # # upper=x$boot$ci[2,(j*niv )]),digits) # dfab <- round(data.frame(indirect = x$ab[,j],boot = sum(x$boot$mean[,1:(j*niv+1)])),digits)} } rownames(dfab) <- rownames(x$ab) colnames(dfab)[1] <- dv[j] print(round(dfab,digits)) } # } #now show the individual ab effects (just works for 1 dv) #rownames(x$boot$ci) <- rownames(x$boot$mean) <- rownames(x$boot$sd ) <- NULL #problem when ndv > 1 for(k in 1: ndv) { if(nmed > 1) { cat("\n 'ab' effects estimates for each mediator for",colnames(x$ab)[k], "\n") #rows of x$boot$mean are IVs #columns of x$boot$mean are g for each DV dfab <- round(data.frame(boot=as.vector(x$boot$mean),sd=as.vector(x$boot$sd), lower=x$boot$ci[1,], upper =x$boot$ci[2,]), digits) # for (j in 1:nmed) { # dfab <-round(data.frame(#indirect = x$all.ab[,j], # boot = as.vector(x$boot$mean[,(k*(j-1)+1):(k*j)]),sd=as.vector(x$boot$sd[,(k*(j-1)+1):(k*j)]), # #lower=x$boot$ci[1,(j*niv*k +1):(j*niv*k +niv)], # #upper=x$boot$ci[2,(j*niv*k +1):(j*niv*k +niv)]),digits) # lower=x$boot$ci[1,(1:(j*niv*k ))], # upper=x$boot$ci[2,(1:(j*niv*k))]),digits) # # #rownames(dfab) <- colnames(x$boot$ci) # # colnames(dfab)[1] <- mv[j] # } print(round(dfab,digits)) #now, if number of mediators >1, compare them } } #not clear what this is supposed to do # if(nmed * niv * ndv > 1) { # cat("\n Compare the ab estimates for each mediator \n") # print(round(compare.boot(x$boot.values),digits)) # } } } compare.boot <- function(boot,alpha=.05) { #find the differences ncboot <- ncol(boot) npairs <- ncboot * (ncboot-1)/2 if(ncboot > 1) { num.cases <- nrow(boot) cnames <- colnames(boot) diff<- matrix(NA,ncol=npairs,nrow=nrow(boot)) cn <- rep(NA,npairs) k <- 1 for(i in 2:ncboot) { for (j in 1:(i-1)){ diff[,k] <- boot[,i]- boot[,j] k <- k +1 }} mean.diff <- colMeans(diff) sd.diff <- apply(diff,2,sd) ci.quant <- apply(diff,2, function(x) quantile(x,c(alpha/2,1-alpha/2),na.rm=TRUE)) # mean.boot <- matrix(mean.boot[1:(numx*numy)],nrow=numx) mean.diff <- matrix(mean.diff,nrow=npairs) # sd.boot <- matrix(sd.boot[1:(numx*numy)],nrow=numx) sd.diff <- matrix(sd.diff,nrow=npairs) k <- 1 for(i in 2:length(cnames)) { for (j in 1:(i-1)){cn[k] <- paste0(cnames[i]," x ",cnames[j]) k <- k + 1} } diff.df <- data.frame(mean.diff=mean.diff,sd.diff=sd.diff,low=ci.quant[1,], high=ci.quant[2,]) rownames(diff.df)<-cn # ci.ab <- matrix(ci.quant,nrow=2*numx) ci.ab <- ci.quant # colnames(mean.boot) <- colnames(sd.boot) <- c(y,m) # rownames(mean.diff) <- rownames(sd.diff) <- x #boots <- list(diffs=diff.df) return(diff.df) } else {} #nothing to do }psych/R/extension.diagram.r0000644000176200001440000001603714430177313015415 0ustar liggesusers"extension.diagram" <- function(fa.results,Phi=NULL,fe.results=NULL,sort=TRUE,labels=NULL,cut=.3,f.cut=.3,e.cut=.1,simple=TRUE,e.simple=FALSE,errors=FALSE,g=FALSE, digits=1,e.size=.05,rsize=.15, side=2,main,cex=NULL,e.cex=NULL,marg=c(.5,.5,1,.5),adj=1,ic=FALSE, ...) { pc <- FALSE old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back col <- c("black","red") # if(!is.matrix(fa.results) && !is.null(fa.results$fa) && is.list(fa.results$fa)) fa.results <- fa.results$fa if(is.null(cex)) cex <- 1 if(is.null(e.cex)) e.cex <- 1 #Phi <- NULL #the default case if(inherits(fa.results,"fa.reg")) { coefficients <- fa.results$regression$coefficients dv.cors <- fa.results$dv.cor fa.results <- fa.results$fa.extend regression <- TRUE if(missing(main)) {main <- "Factor analysis regression"}} else {regression <- FALSE if(missing(main)) {main <- "Factor analysis and extension"}} var.list <- arrow.list <- curve.list <- self.list<- list() if(sort) { if(!is.null(fa.results$fo)) {fe.results <- fa.sort(fa.results$fo)} else {fe.results <- fa.sort(fa.results)}} if((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {factors <- as.matrix(fe.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi} else { if(!is.null(fa.results$cor)) {Phi<- fa.results$cor} }} else {factors <- fa.results} nvar <- dim(factors)[1] #how many variables? if (is.null(nvar) ){nvar <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} #first some basic setup parameters nvar <- dim(factors)[1] #how many variables? e.size = e.size*16*cex/nvar if (is.null(nvar) ){nvar <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} if (is.null(rownames(factors))) {rownames(factors) <- paste("V",1:nvar,sep="") } if (is.null(colnames(factors))) {colnames(factors) <- paste("F",1:num.factors,sep="") } var.rect <- list() fact.rect <- list() max.len <- max(nchar(rownames(factors)))*rsize x.max <- max((nvar+1),6) limx=c(-max.len/2,x.max) n.evar <- 0 if(!is.null(fe.results)) {n.evar <- dim(fe.results$loadings)[1] limy <- c(0,max(nvar+1,n.evar+1))} else { limy=c(0,nvar+1) } top <- max(nvar,n.evar) + 1 plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main,...) max.len <- max(strwidth(rownames(factors)),strwidth("abc"))/1.8 #slightly more accurate, but needs to be called after plot is opened limx=c(-max.len/2,x.max) cex <- min(cex,20/x.max) if(g) {left <- .3*x.max #where should the variable boxes go? It depends upon g middle <- .6*x.max gf <- 2 } else {left <- 0 middle <- .3*x.max gf <- 1} for (v in 1:nvar) { d.rect <- var.rect[[v]] <- dia.rect(left,top -v - max(0,n.evar-nvar)/2 ,labels=rownames(factors)[v],xlim=limx,ylim=limy,cex=cex,draw=FALSE,...) var.list <- c(var.list,rownames(factors)[v],d.rect) } f.scale <- (top)/(num.factors+1) f.shift <- max(nvar,n.evar)/num.factors if(g) {fact.rect[[1]] <- dia.ellipse(-max.len/2,top/2,colnames(factors)[1],xlim=limx,ylim=limy,e.size=e.size,cex=e.cex,...) for (v in 1:nvar) {if(simple && (abs(factors[v,1]) == max(abs(factors[v,])) ) && (abs(factors[v,1]) > cut) | (!simple && (abs(factors[v,1]) > cut))) { dia.arrow(from=fact.rect[[1]],to=var.rect[[v]]$left,labels =round(factors[v,1],digits),col=((sign(factors[v,1])<0) +1),lty=((sign(factors[v,1])<0)+1)) }}} for (f in gf:num.factors) { #body 34 if (pc) {fact.rect[[f]] <- dia.rect(left+middle,(num.factors+gf-f)*f.scale,colnames(factors)[f],xlim=limx,ylim=limy,cex=e.cex,...) } else {fact.rect[[f]] <- dia.ellipse(left+middle,(num.factors+gf -f)*f.scale,colnames(factors)[f],xlim=limx,ylim=limy,e.size=e.size,cex=e.cex,...)} for (v in 1:nvar) { if(simple && (abs(factors[v,f]) == max(abs(factors[v,])) ) && (abs(factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if(pc) {d.arrow<- dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$right,labels =round(factors[v,f],digits),col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0)+1),adj=f %% adj ,cex=cex, draw=FALSE) } else {d.arrow<- dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =round(factors[v,f],digits),col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0)+1),adj=f %% adj +1,cex=cex, draw=FALSE) } arrow.list <- c(arrow.list,d.arrow) } } } if(!is.null(Phi) && (ncol(Phi) >1)) { for (i in 2:num.factors) { for (j in 1:(i-1)) { if(abs(Phi[i,j]) > f.cut) { # dia.curve(from=c(x.max-2+ e.size*nvar,(num.factors+1-i)*f.scale),to=c(x.max -2+ e.size*nvar,(num.factors+1-j)*f.scale),labels=round(Phi[i,j],digits),scale=(i-j),...)} dca <- dia.curved.arrow(from=fact.rect[[j]]$right,to=fact.rect[[i]]$right,labels=round(Phi[i,j],digits),scale=(i-j), draw=FALSE,cex=e.cex,...) curve.list <- c(curve.list,dca)} } } } if (errors) {for (v in 1:nvar) { dia.self(location=var.rect[[v]],scale=.5,side=side) } } if(!is.null(fe.results)) { if(regression) {e.loadings <- t(coefficients)} else { e.loadings <- fa.results$fe$loadings} n.evar <- NROW(e.loadings) cut <- e.cut #draw all extensions simple <- e.simple for (v in 1:n.evar) { # var.rect[[v]] <- dia.rect(x.max-middle,top - v* nvar/(n.evar+1),rownames(e.loadings)[v],xlim=limx,ylim=limy,cex=e.cex,...) var.rect[[v]] <- dia.rect(x.max-middle,top - (v)*(top)/(n.evar+1),rownames(e.loadings)[v],xlim=limx,ylim=limy,cex=e.cex,...) for(f in 1:num.factors) { if(simple && (abs(e.loadings[v,f]) == max(abs(e.loadings[v,])) ) && (abs(e.loadings[v,f]) > cut) | (!simple && (abs(e.loadings[v,f]) > cut))) { if(ic) {dia.arrow(to=fact.rect[[f]]$right,from=var.rect[[v]]$left,labels =round(e.loadings[v,f],digits),col=((sign(e.loadings[v,f])<0) +1),lty=((sign(e.loadings[v,f])<0)+1),adj=f %% adj +1,...)} else { dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$left,labels =round(e.loadings[v,f],digits),col=((sign(e.loadings[v,f])<0) +1),lty=((sign(e.loadings[v,f])<0)+1),adj=f %% adj +1,...)}} } } if(regression){ ny <- NCOL(dv.cors) scale.adj <- nvar/ny /4 #this makes the dependent correlations more obvious dv.cors <- round(dv.cors,digits) if(ny >1) {for (i in 2:ny) { for (k in 1:(i-1)) {if(abs(dv.cors[i,k]> e.cut)) { dca <- dia.curved.arrow(var.rect[[i]]$right,var.rect[[k]]$right,dv.cors[i,k],scale=(abs(i-k)*scale.adj),dir="u",cex=e.cex,draw=FALSE, ...)} else {dca<- NULL} curve.list <- c(curve.list,dca)} } }} multi.rect(var.list,...) multi.arrow(arrow.list,...) multi.curved.arrow(curve.list,...) } } #11/1/22 minor tweak in drawing the extension variables to center it properly psych/R/VSS.parallel.R0000644000176200001440000000046610625430047014201 0ustar liggesusers"VSS.parallel" <- function(ncases,nvariables,scree=FALSE,rotate="none") #function call { simdata=matrix(rnorm(ncases*nvariables),nrow=ncases,ncol=nvariables) #make up simulated data if(scree) {VSS.scree(simdata) testsim <- simdata} else {testsim <- VSS(simdata,8,rotate)} return(testsim) } psych/R/ICLUST.rgraph.R0000644000176200001440000000707512456461256014235 0ustar liggesusers#create Rgraphviz commands #create dot file for graphviz from ICLUST output #modified from ICLUST.graph to produce Rgraphviz output "ICLUST.rgraph" <- function(ic.results,out.file = NULL, min.size=1,short=FALSE,labels=NULL, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=2,title="ICLUST",label.font=2, ...){ if(!requireNamespace('Rgraphviz')) {stop("I am sorry, you need to have the Rgraphviz package installed") #create several dummy functions to get around the "no visible global function definition" problem nodes <- function() {} addEdge <- function() {} subGraph <- function(){} } clusters <- as.matrix(ic.results$clusters) results <- ic.results$results rank.direction <- match.arg(rank.direction) #first some basic setup parameters #create the items as boxes #add the sign from the clusters num.var <- dim(clusters)[1] #how many variables? num.clust <- num.var - dim(clusters)[2] vars <- paste("V",1:num.var,sep="") clust <- paste("C",1:num.clust,sep="") clust.graph <- new("graphNEL",nodes=c(vars,clust),edgemode="directed") graph.shape <- c(rep("box",num.var),rep("ellipse",num.clust)) graph.rank <- c(rep("sink",num.var),rep("",num.clust)) names(graph.shape) <- nodes(clust.graph) names(graph.rank) <- nodes(clust.graph) edge.label <- rep("",num.clust*2) edge.name <- rep("",num.clust*2) names(edge.label) <- seq(1:num.clust*2) #show the cluster structure with ellipses for (i in 1:num.clust) {if(results[i,1]>0) { #avoid printing null results clust.graph <- addEdge(row.names(results)[i], results[i,1], clust.graph,1) edge.label[(i-1)*2+1] <- round(results[i,"r1"],digits) edge.name [(i-1)*2+1] <- paste(row.names(results)[i],"~", results[i,1],sep="") clust.graph <- addEdge(row.names(results)[i], results[i,2], clust.graph,1) edge.label[i*2] <- round(results[i,"r2"],digits) edge.name [i*2] <- paste(row.names(results)[i],"~", results[i,2],sep="") }} nAttrs <- list() #node attributes eAttrs <- list() #edge attributes if (!is.null(labels)) {var.labels <- c(labels,row.names(results)) #note how this combines variable labels with the cluster variables names(var.labels) <- nodes(clust.graph) nAttrs$label <- var.labels names(edge.label) <- edge.name node.font.size <- as.numeric(node.font[2]) n.font.size <- c(rep(label.font*node.font.size,length(labels)),rep(node.font.size,(length(var.labels)-length(labels)))) names(n.font.size) <- nodes(clust.graph) nAttrs$fontsize <- n.font.size } names(edge.label) <- edge.name e.font.size <- rep(6,num.clust*2) names(e.font.size) <- edge.name nAttrs$shape <- graph.shape eAttrs$fontsize <- e.font.size nAttrs$rank <- graph.rank eAttrs$label <- edge.label attrs <- list(node = list(shape = "ellipse", fixedsize = FALSE),graph=list(rankdir=rank.direction, fontsize=8,bgcolor="white" )) obs.var <- subGraph(vars,clust.graph) cluster.vars <- subGraph(clust,clust.graph) observed <- list(list(graph=obs.var,cluster=TRUE,attrs=c(rank="sink"))) plot(clust.graph, nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs,subGList=observed,main=title) if (!is.null(out.file)) { toDotty(clust.graph,out.file,nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs,subGList=observed) } } #test with Harman data sets #ic1 <- ICLUST(Harman74.cor$cov) #ic4 <- ICLUST(Harman74.cor$cov,nclusters=4) #ic8 <- ICLUST(Harman23.cor$cov) #ICLUST.rgraph(ic1) #ICLUST.rgraph(ic4) #ICLUST.rgraph(ic8,labels=colnames(Harman23.cor$cov)) psych/R/parse.R0000644000176200001440000000315713401070302013032 0ustar liggesusers #This parses a formula like input and return the left hand side variables (y) and right hand side (x) as well as products (prod) and partials (-) # fparse <- function(expr){ m <- prod <- ex <- ex2 <- NULL all.v <- all.vars(expr) te <- terms(expr) #this will expand the expr for products fac <- attributes(te)$factors x <- rownames(fac)[-1] #drop the y variables # y <- all.v[!all.v %in% x] z <- rownames(fac)[rowSums(fac) < 1] #what does this do? if(length(z) > 1) {z <- z[-1] x <- x [! x%in%z]} else {z <- NULL} char.exp <- as.character(expr[3]) #strip out exponential terms from x notx <- regmatches(char.exp, gregexpr("I\\(.*?\\)", char.exp))[[1]] x <- x[!x %in%notx] ex1 <- gsub("I[\\(\\)]", "", regmatches(char.exp, gregexpr("I\\(.*?\\)", char.exp))[[1]]) #look for I(x) if (length(ex1) >0) {ex <- sub("\\)","",ex1) } x <- x[ ! x %in% ex] #now look for mediators m <- gsub("[\\(\\)]", "", regmatches(char.exp, gregexpr("\\(.*?\\)", char.exp))[[1]]) if(length(m)<1) {m <- NULL} else {m <- m[! m %in% ex] } if(length(m) < 1) m <- NULL prod.terms <- sum(attributes(te)$order > 1) if(prod.terms > 0 ) { n1 <- sum(attributes(te)$order == 1) prod <- list() for(i in(1:prod.terms)) { prod[[i]] <- names(which(fac[,n1+i] > 0)) } } #now, if there are ex values, get rid of the ^2 if(!is.null(ex)) {ex <- sub("\\^2","",ex) } y <- all.v[ ! all.v %in% c(x,z,ex) ] return(list(y=y,x=x,m=m,prod=prod,z = z,ex=ex)) } psych/R/fa.ci.R0000644000176200001440000000256514444434173012724 0ustar liggesusers#revised 11/5/14 to handle sorted fa data with bootstrapped confidence intervals "print_psych.fa.ci" <- function(x,digits=2,all=FALSE,...) { cat("Factor Analysis with confidence intervals using method = ",x$f$fm ) # cat("\nCall: ") print(x$Call) class(x) <- c("psych","fa") print(x) nfactors <-dim(x$loadings)[2] c("\n Confidence intervals\n") if(is.null(x[["ci"]])) { lc <- lci <- data.frame(unclass(x$loadings),x$cis$ci)} else { lc <- lci <- data.frame(unclass(x$loadings),x$ci)} for(i in 1:nfactors) { lci[,(i-1)*3 +2 ] <- lc[,i] lci[,(i-1)*3 +1 ] <- lc[,i+nfactors] lci[,(i-1)*3 +3 ] <- lc[,i+nfactors*2] } colnames(lci) <- paste(rep(c("low","coeff","upper"),nfactors),sep="") for(i in 1:nfactors) { colnames(lci)[(i-1)*3+2] <- colnames(x$loadings)[i] } cat("\n Coefficients and bootstrapped confidence intervals \n") print (round(lci,digits=digits)) if(!is.null(x$Phi)) { phis <- x$Phi[lower.tri(x$Phi)] cci <- data.frame(lower=x$cis$ci.rot$lower,estimate = phis,upper= x$cis$ci.rot$upper) cnR <- abbreviate(colnames(x$loadings),minlength=5) k <- 1 for(i in 1:(nfactors-1)) {for (j in (i+1):nfactors) { rownames(cci)[k] <- paste(cnR[i],cnR[j],sep="-") k<- k +1 }} #added 10/4/14 cat("\n Interfactor correlations and bootstrapped confidence intervals \n") print(cci,digits=digits)} } psych/R/diagram.R0000644000176200001440000004415114526765160013351 0ustar liggesusers#The diagram functions #The diagram function is the generic function (actually empty) that allows a clearer help file #the entire set of functions for diagramming are all prefixed as dia. "diagram" <- function(fit,...) { # figure out which psych function was called and then call the appropriate diagram value <- NULL omega <- bassAck <- extend <- extension <- lavaan <- fa.reg <- NULL #weird requirement to avoid being global names <- cs(lm, lavaan,fa,principal,iclust,omega,bassAck, extension, extend, fa.reg, esem,mediate, lmCor, setCor, fa.multi,lm) if(length(class(fit)) == 1) { value <- inherits(fit,names,which=TRUE) #if (inherits(fit,"lm")) value <- "lm" } if(any(value > 0) ) { value <- names[which(value > 0)]} else {value <- "other"} } if(length(class(fit)) > 1) { names <- cs(lm, lavaan,fa,principal,iclust,omega,bassAck, extension, extend, fa.reg, esem,mediate, lmCor, setCor, fa.multi,lm) value <- inherits(fit,names,which=TRUE) # value <- class(x)[2] if(any(value > 0) ) { value <- names[which(value > 0)]} else {value <- "other"} } switch(value, #replaced a series of if statements to switch 12/23/18 fa = {fa.diagram(fit,...) }, principal = {fa.diagram(fit,...) }, iclust = {iclust.diagram(fit,...)}, omega = {omega.diagram(fit,...)}, lavaan = {lavaan.diagram(fit,...)}, bassAck = {bassAckward.diagram(fit,...)} , extend = {extension.diagram(fit ,...)}, extension ={extension.diagram(fit,...)}, fa.reg ={extension.diagram(fit,...)}, esem = {esem.diagram(fit,...)}, setCor = {lmDiagram(fit,...)}, lmCor = {lmDiagram(fit,...)}, lm = {lmDiagram(fit, ...)}, mediate ={mediate.diagram(fit,...)}, fa.multi ={fa.multi.diagram(fit,...)} ) } # 6/6/20 added the draw option to allow for speeding up higher level calls # added the pmax to handle multiple calls #modified April 19, 2012 to handle long names more gracefully "dia.rect" <- function(x, y = NULL, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),draw=TRUE,...) { if(draw) text(x=x, y = y, labels = labels, cex = cex, ...) xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) xs <- .10 * xrange ys <- .10 * yrange #len <- max(nchar(labes)*cex*.2/2,cex*.25)*xs len <- pmax(strwidth(labels,units="user",cex=cex,...),strwidth("abc",units="user",cex=cex,...)) /1.8 #was 1.8 vert <- pmax(strheight(labels,units="user",cex=cex,...),strheight("ABC",units="user",cex=cex,...))/1. # vert <- min(cex*.3 * ys,.3) if(draw) rect(x-len,y-vert,x+len,y+vert) left <- c(x-len,y) right <- c(x+len,y) top <- c(x,y+vert) bottom <- c(x,y-vert) radius <- sqrt(len^2+vert^2) dia.rect <- list(left=left,right=right,top=top,bottom=bottom,center=c(x,y),radius=radius) } "dia.rect1" <- function(x,y,xleft,ybot,xright,ytop, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),draw=TRUE,...) { text(x,y,labels,cex=cex,...) rect(xleft,ybot,xright,ytop) } "dia.ellipse" <- function(x, y = NULL, labels = NULL, cex = 1,e.size=.05,xlim=c(0,1),ylim=c(0,1),draw=TRUE, ...) { if(draw) text(x=x, y = y, labels = labels,cex = cex, ...) len <- max(strwidth(labels),strwidth("abc"))/1.6 ht <- max(strheight(labels),strheight("abc"))/1.0 #vert <- cex*.3 xs <- dia.ellipse1(x,y,xlim=xlim,ylim=ylim,e.size=e.size*len,draw=draw,...) # ys <- xs[2] # xs <- xs[1] ys <- xs* ht/len left <- c(x-xs,y) #xs is a little too big right <- c(x+xs,y) top <- c(x,y+ys) bottom <- c(x,y-ys) center <- c(x,y) dia.ellipse <- list(left=left,right=right,top=top,bottom=bottom,center=center,radius=xs) } "dia.ellipse1" <- function (x,y,e.size=.05,xlim=c(0,1),ylim=c(0,1),draw=TRUE,...) { #code adapted from John Fox segments=51 angles <- (0:segments) * 2 * pi/segments angles <- c(angles,NA) #this allows us to draw multiple ellipses unit.circle <- cbind(cos(angles), sin(angles)) xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) xs <- e.size * xrange ys <- e.size * yrange ellipse <- unit.circle ellipse[,1] <- ellipse[,1]*xs + x ellipse[,2] <- ellipse[,2]*ys + y #ys? if(draw) lines(ellipse, ...) # return(list(xs=xs,ys=ys)) return(xs) } "dia.ellipse.fill" <- function (x,y,e.size=.05,xlim=c(0,1),ylim=c(0,1),draw=TRUE,density=NULL,angle=NULL,...) { #code adapted from John Fox segments=51 angles <- (0:segments) * 2 * pi/segments angles <- c(angles,NA) #this allows us to draw multiple ellipses unit.circle <- cbind(cos(angles), sin(angles)) xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) xs <- e.size * xrange ys <- e.size * yrange ellipse <- unit.circle ellipse[,1] <- ellipse[,1]*xs + x ellipse[,2] <- ellipse[,2]*ys + y #ys? if(draw) lines(ellipse, ...) polygon(ellipse,density=density,angle=angle,...) # return(list(xs=xs,ys=ys)) return(xs) } #added to allow for fast drawing of multiple ellipses in fa.diagram and bassAckward diagram dia.multi.ellipse <- function(x, y = NULL, labels = NULL, cex = 1,e.size=.4,xlim=c(0,1),ylim=c(0,1),draw=TRUE, ...) { segments=51 text(x=x, y = y, labels = labels,cex = cex, ...) angles <- (0:segments) * 2 * pi/segments angles <- c(angles,NA) #this allows us to draw multiple ellipses unit.circle <- cbind(cos(angles), sin(angles)) xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) ys <- max(strheight(labels,units="user")) #*.55 # xs <- e.size * xrange #ys <- e.size * yrange ellipse <- unit.circle xs <- strwidth(labels,units="user")*.6 #this makes the ellipse closer to left and right #xs <- xs * e.size xs <- xs * cex ys <- ys * cex ellipsex <- rep(x,each=(segments + 2)) + unit.circle[,1] * rep(xs,each=segments +2) ellipsey <- rep(y,each=(segments + 2)) + unit.circle[,2] *ys lines(ellipsex,ellipsey) } "dia.triangle" <- function(x, y = NULL, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),...) { text(x=x, y = y, labels = labels, cex = cex, ...) STRETCH=.25 xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) xs <- .10 * xrange ys <- .10 * xrange #len <- max(nchar(labels)*cex*.2/2,cex*.25)*xs len <- max(strwidth(labels)*.7,strwidth("abc")) #vert <- min(cex*.3 * ys,.3) vert <- .7*len left <- c(x-len/2,y+vert/2-STRETCH/2) right <- c(x+len/2,y+vert/2-STRETCH/2) top <- c(x,y+vert) # bottom <- c(x,y-vert/2) bottom <- c(x,y-vert*STRETCH) xs <- c(x-len,x+len,x) ys <- c(y-vert*STRETCH,y-vert*STRETCH,y+vert) #ys <- c(y,y,y+vert) polygon(xs,ys) radius <- sqrt(len^2+vert^2) dia.rect <- list(left=left,right=right,top=top,bottom=bottom,center=c(x,y),radius=radius) } "dia.curve" <- function(from,to,labels=NULL,scale=1,...) { #arrow at beginning and end of curve #first, figure out the boundaries of the curve radius1 <- radius2 <- 0 if(is.list(from)) {if(!is.null(from$radius)) {radius1 <- from$radius radius2 <- 0 from <- from$center}} if(is.list(to)) {if(!is.null(to$radius)) {radius2<- to$radius to <- to$center}} theta <- atan((from[2] - to[2])/(from[1] - to[1])) if((from[1] < to[1])) { radius1 <- -radius1 radius2 <- -radius2} from[1] <- from[1] - cos(theta) * radius1 from[2] <- from[2] - sin(theta) * radius1 to[1] <- to[1] + cos(theta) * radius2 to[2] <- to[2] + sin(theta) * radius2 n <- 40 scale <- .8 * scale if(is.null(labels)) {shift<- 0} else {shift <- 4} if(abs(from[1] - to[1]) < abs(from[2] - to[2])) { #is it primarily up and down or left to right? x <- c(from[1],(from[1]+to[1])/2+scale,to[1]) y <- c(from[2],(from[2]+to[2])/2,to[2]) sp <- spline(y,x,n) lines(sp$y[1:(n/2-shift)],sp$x[1:(n/2-shift)]) lines(sp$y[(n/2+shift):n],sp$x[(n/2+shift):n]) arrows(sp$y[3],sp$x[3],sp$y[1],sp$x[1],length = .5*abs(sp$x[2]-sp$x[1])) arrows(sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],length = .5*abs(sp$x[2]-sp$x[1])) text(sp$y[n/2],sp$x[n/2],labels,...) } else { x <- c(from[1],(from[1]+to[1])/2,to[1]) y <- c(from[2],(from[2]+to[2])/2+ scale,to[2]) sp <- spline(x,y,n) lines(sp$x[1:(n/2-shift)],sp$y[1:(n/2-shift)]) lines(sp$x[(n/2+shift):n],sp$y[(n/2+shift):n]) arrows(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = 1*abs(sp$y[2]-sp$y[1])) arrows(sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],length = 1*abs(sp$y[2]-sp$y[1])) text(sp$x[n/2],sp$y[n/2],labels,...) } } "dia.curved.arrow" <- function(from,to,labels=NULL,scale=1,both=FALSE,dir=NULL,draw=TRUE,...) { #note that the splines seem to go from left to right no matter what! #first determine whether to add or subtract the radius result <- NULL radius1 <- radius2 <- 0 if(is.list(from)) {if(!is.null(from$radius)) {radius1 <- from$radius radius2 <- 0 from <- from$center}} if(is.list(to)) {if(!is.null(to$radius)) {radius2<- to$radius to <- to$center}} theta <- atan((from[2] - to[2])/(from[1] - to[1])) if( from[1] < to[1] ) { radius1 <- -radius1 radius2 <- -radius2} from[1] <- from[1] - cos(theta) * radius1 from[2] <- from[2] - sin(theta) * radius1 to[1] <- to[1] + cos(theta) * radius2 to[2] <- to[2] + sin(theta) * radius2 scale <- .8 * scale n <- 40 if(is.null(labels)) {shift<- 0} else {shift <- 4} #3 cases: dir="u", dir = "l", dir = NULL if(is.null(dir)) { if((abs(from[1] - to[1]) < abs(from[2] - to[2]))) {dir <- "u"} else {dir <- "l"}} switch(dir, # if(is.null(dir) & (abs(from[1] - to[1]) < abs(from[2] - to[2]))) u = { #is it primarily up and down or left to right? x <- c(from[1],(from[1]+to[1])/2+scale,to[1]) y <- c(from[2],(from[2]+to[2])/2,to[2]) sp <- spline(y,x,n) if(draw) { lines(sp$y[1:(n/2-shift)],sp$x[1:(n/2-shift)]) lines(sp$y[(n/2+shift):n],sp$x[(n/2+shift):n]) arrows(sp$y[3],sp$x[3],sp$y[1],sp$x[1],length = .5*abs(sp$x[2]-sp$x[1])) if(both) arrows(sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],length = .5*abs(sp$x[2]-sp$x[1])) text(sp$y[n/2],sp$x[n/2],labels,...) } else { result <- list(firstx=c(sp$y[1:(n/2-shift)],NA), firsty=c(sp$x[1:(n/2-shift)],NA), secondx = c(sp$y[(n/2+shift):n],NA), secondy=c(sp$x[(n/2+shift):n],NA), firstarrow = c(sp$y[3],sp$x[3],sp$y[1],sp$x[1],length = .5*abs(sp$x[2]-sp$x[1]) ), text = c(sp$y[n/2],sp$x[n/2],labels) )} }, l = { x <- c(from[1],(from[1]+to[1])/2,to[1]) y <- c(from[2],(from[2]+to[2])/2+ scale,to[2]) sp <- spline(x,y,n) if(draw) {lines(sp$x[1:(n/2-shift)],sp$y[1:(n/2-shift)]) lines(sp$x[(n/2+shift):n],sp$y[(n/2+shift):n]) if(both) { arrows(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = 1*abs(sp$y[2]-sp$y[1])) arrows(sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],length = 1*abs(sp$y[2]-sp$y[1])) } else {if((from[1] > to[1] ) ) {arrows(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = 1*abs(sp$y[2]-sp$y[1])) } else { arrows(sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],length = 1*abs(sp$y[2]-sp$y[1]))} text(sp$x[n/2],sp$y[n/2],labels,...) } } else {result <- list(firstx=c(sp$x[1:(n/2-shift)],NA), firsty=c(sp$y[1:(n/2-shift)],NA), secondx = c(sp$x[(n/2+shift):n],NA), secondy=c(sp$y[(n/2+shift):n],NA), firstarrow = c(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = .5*abs(sp$y[2]-sp$x[1]) ), text = c(sp$x[n/2],sp$y[n/2],labels) ) } }) return(result) } "dia.self" <- function(location,labels=NULL,scale=.8,side=2,draw=TRUE,...) { n <- 20 if(side %%2 > 0) {scale <- scale*(location$right[1] - location$left[1]) } else {scale <- scale*(location$top[2] - location$bottom[2]) } # scale <- scale *.8 result <- NULL if(side ==1) {x <- c(location$bottom[1]-scale/2,location$bottom[1],location$bottom[1]+scale/2) y <- c(location$bottom[2],location$bottom[2]-scale,location$bottom[2]) sp <- spline(x,y,n=20) if(draw) {lines(sp$x,sp$y) arrows(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = 2*abs(sp$x[3]-sp$x[1])) arrows(sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],length = 2*abs(sp$x[3]-sp$x[1])) text(sp$x[n/2],sp$y[n/2]-scale,labels,...)} result <- list(sp$x[n/2],sp$y[n/2]-scale,labels, c(sp$x,NA),c(sp$y,NA),sp$x[3],sp$y[3],sp$x[1],sp$y[1],sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n], 2*abs(sp$x[3]-sp$x[1])) } if(side == 2) {x <- c(location$left[1],location$left[1]-scale,location$left[1]) y <- c(location$left[2]-scale/2,location$left[2],location$left[2]+scale/2) sp <- spline(y,x,n=20) if(draw) {lines(sp$y,sp$x) arrows(sp$y[3],sp$x[3],sp$y[1],sp$x[1],length = 2*abs(sp$x[2]-sp$x[1])) arrows(sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],length = 2*abs(sp$x[2]-sp$x[1])) text(sp$y[n/2]-scale,sp$x[n/2],labels,...) } result <- list(sp$x[n/2],sp$y[n/2]-scale,labels, c(sp$x,NA),c(sp$y,NA),sp$y[3],sp$x[3],sp$y[1],sp$x[1],sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],2*abs(sp$x[2]-sp$x[1])) } if(side == 3) {x <- c(location$top[1]-scale/2,location$top[1],location$top[1]+scale/2) y <- c(location$top[2],location$top[2]+scale,location$top[2]) sp <- spline(x,y,n=20) if(draw) {lines(sp$x,sp$y) arrows(sp$x[3],sp$y[3],sp$x[1],sp$y[1],length = 2*abs(sp$x[3]-sp$x[1])) arrows(sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],length = 2*abs(sp$x[3]-sp$x[1])) text(sp$x[n/2],sp$y[n/2]+scale,labels,...)} result <- list(sp$x[n/2],sp$y[n/2]-scale,labels, c(sp$x,NA),c(sp$y,NA),sp$x[3],sp$y[3],sp$x[1],sp$y[1],sp$x[n-3],sp$y[n-3],sp$x[n],sp$y[n],2*abs(sp$x[3]-sp$x[1])) } if(side == 4) {x <- c(location$right[1],location$right[1]+scale,location$right[1]) y <- c(location$right[2]-scale/2,location$right[2],location$right[2]+scale/2) sp <- spline(y,x,n=20) if(draw) {lines(sp$y,sp$x) arrows(sp$y[3],sp$x[3],sp$y[1],sp$x[1],length =2* abs(sp$x[3]-sp$x[1])) arrows(sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],length = 2*abs(sp$x[3]-sp$x[1])) text(sp$y[n/2]+scale,sp$x[n/2],labels,...)} result <- list(sp$x[n/2],sp$y[n/2]-scale,labels, c(sp$x,NA),c(sp$y,NA),sp$y[3],sp$x[3],sp$y[1],sp$x[1],sp$y[n-3],sp$x[n-3],sp$y[n],sp$x[n],2*abs(sp$x[3]-sp$x[1])) } return(result) } "dia.shape" <- function(x, y = NULL, labels = NULL, cex = 1,e.size=.05,xlim=c(0,1),ylim=c(0,1),shape=1, ...) { if (shape==1) {dia.rect(x, y = NULL, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),...) } if (shape==2) {dia.ellipse(x, y = NULL, labels = NULL, cex = 1,e.size=.05,xlim=c(0,1),ylim=c(0,1), ...)} if (shape==3) {dia.triangle(x, y = NULL, labels =NULL, cex = 1, xlim=c(0,1),ylim=c(0,1),...)} } "multi.curved.arrow" <- function(curved.list,l.cex=NULL,...) { leng <- length(curved.list) if(leng > 0) { #nvar <- leng/6 seq <- seq.int(1,leng,6) firstX <- unlist(curved.list[seq]) firstY <- unlist(curved.list[seq +1]) secondX <- unlist(curved.list[seq +2]) secondY <- unlist(curved.list[seq+3]) textvalues <- matrix(unlist(curved.list[seq+5]),ncol=3,byrow=TRUE) lines(firstX,firstY) lines(secondX,secondY) if(is.null(l.cex) ) l.cex=1 text(x= textvalues[,1],y = textvalues[,2],labels=textvalues[,3],cex=l.cex,...) }} "multi.self" <- function(self.list,...) { leng <- length(self.list) seq <- seq.int(1,leng,14) x <- unlist(self.list[seq+3]) y <- unlist(self.list[seq+4]) lines(x,y) arrow.df <- data.frame(x0 = unlist(self.list[seq+5]),y0 = unlist(self.list[seq+6]),x1=unlist(self.list[seq+7]),y1 = unlist(self.list[seq+8]), x00 = unlist(self.list[seq+9]),y00 = unlist(self.list[seq+10]),x11=unlist(self.list[seq+11]),y11 = unlist(self.list[seq+12])) arrow.length <- unlist(self.list[seq+13]) arrows(arrow.df$x0,arrow.df$y0,arrow.df$x1,arrow.df$y1,arrow.length[1]) arrows(arrow.df$x00,arrow.df$y00,arrow.df$x11,arrow.df$y11,arrow.length[1]) } "multi.arrow" <- function(arrows.list,...){ tv <- matrix(unlist(arrows.list),byrow=TRUE,ncol=21) #cname<- colnames(tv) tv <- data.frame(tv,stringsAsFactors=FALSE) tv[,c(1:2,4:19)] <- nchar2numeric(tv[,c(1:2,4:19)]) if(nchar(as.vector(tv[1,21])) ==1) tv[,21] <- as.numeric(tv[,21 ]) #sometimes this is "solid", sometimes "1" #the as.vector gets around a problem in R 3.6 #colnames(tv) <- cname textlocation <- tv[,1:5] #colnames(textlocation) <- c("x","y","labels","pos","cex") textlocation1 <- tv[tv[,4]>0,1:5] textlocation0 <- tv[tv[,4]==0,1:5] if(NROW(textlocation1)>0 ) text(textlocation1[1:2],labels=textlocation1[,3],pos=textlocation1[,4],cex=textlocation1[,5]) text(textlocation0[1:2],labels=textlocation0[,3],cex=textlocation0[,5]) # text(tv[,1],tv[,2],tv[,3],pos=tv[,4],cex=tv[,5]) len1 <- as.numeric(tv[1,10]) len2 <- as.numeric(tv[1,17]) arrows(x0=tv[,6],y0=tv[,7],x1=tv[,8],y1=tv[,9],length=len1,angle=30,code=1,col=tv[,20],lty=tv[,21],...) arrows(x0=tv[,13],y0=tv[,14],x1=tv[,15],y1=tv[,16],length=len2,angle=30,code=2,col=tv[,20],lty=tv[,21],...) } multi.rect <- function (rect.list, ...) { nvar <- length(rect.list)/7 tv <- matrix(unlist(rect.list), nrow = nvar, byrow = TRUE) all.rects.x <- as.numeric(tv[, 6]) all.rects.y <- as.numeric(tv[, 3]) all.rects.names <- tv[, 1] dia.rect(all.rects.x, all.rects.y, all.rects.names) } # "multi.rect" <- function(rect.list,...) { # nvar <- length(rect.list) # tv <- matrix(unlist(rect.list),nrow=nvar,byrow=TRUE) # all.rects.x <- as.numeric(tv[,5]) #the center of the figure # all.rects.y <- as.numeric(tv[,2]) # all.rects.names <- tv[,1] # i dont have names # dia.rect(all.rects.x, all.rects.y,all.rects.names) # } psych/R/fa.graph.R0000644000176200001440000000633511744537324013433 0ustar liggesusers#take the output from a factor analysis and create the dot code #Created April 20, 2012 to allow making dot code without using rgraphviz "fa.graph" <- function(fa.results,out.file=NULL,labels=NULL,cut=.3,simple=TRUE, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,main="Factor Analysis", ...){ Phi <- NULL #the default case if(!missing(out.file)){ out <- file(out.file, "w") #set it to write permission on.exit(close(out)) } else out <- stdout() #use the normal console for output if((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) Phi <- fa.results$Phi} else {factors <- as.matrix(fa.results)} rank.direction <- match.arg(rank.direction) #first some basic setup parameters if (length(labels)==0) { labels <- rownames(fa.results$loadings)} else {labels=labels} num.var <- dim(factors)[1] #how many variables? if (is.null(num.var) ){num.var <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} vars <- paste("V",1:num.var,sep="") fact <- paste("F",1:num.factors,sep="") #first some basic setup parameters cat( file=out,paste('digraph Factor ', ' {\n', sep="")) cat(file=out, paste(' rankdir=', rank.direction, ';\n', sep="")) cat(file=out, paste(' size="',size[1],',',size[2],'";\n', sep="")) cat(file=out, paste(' node [fontname="', node.font[1], '" fontsize=', node.font[2], ' shape=box, width=2];\n', sep="")) cat(file=out, paste(' edge [fontname="', edge.font[1], '" fontsize=', edge.font[2], '];\n', sep="")) #cat(file=out, paste(' label = "' ,main,'";fontsize=20;\n', sep="")) if(simple) { # do simple structure if requested rowmax <- apply(abs(factors),1,max) factors[abs(factors) < rowmax] <- 0 } #create the items as boxes for (i in 1:num.var) { cat(file=out,paste('V',i,' [label = "',labels[i], '"];\n', sep="")) } #show the factors as ellipses cat(file=out,paste('node [shape=ellipse, width ="1"];\n', sep="")) #draw the loadings for( nf in 1:num.factors) { for (i in 1:num.var) {if(abs(factors[i,nf]) > cut ) { #avoid printing null results cat(file=out,paste(colnames(factors)[nf], '-> V', i, ' [ label = ',round(factors[i,nf],digits),' ];\n', sep="")) } } } #draw the interfactor correlations if(!is.null(Phi)) { for (f in 2:num.factors) { for (f1 in 1:(f-1)) { if(abs(Phi[f,f1]) > cut) { cat(file=out,paste(colnames(factors)[f], ' -> ', colnames(factors)[f1], ' [ label = ',round(Phi[f,f1],digits),' , dir="both" ];\n', sep="")) } } } } #keep the boxes all at the same rank (presumably the left side) cat(file=out, paste('{ rank=same;\n', sep="")) for (i in 1:num.var) { cat(file=out,paste('V',i,';', sep="")) } cat(file=out, paste('}', sep="")) cat(file=out, paste('{ rank=same;\n', sep="")) for (nf in 1:num.factors) { cat(file=out,paste(paste(colnames(factors)[nf],';', sep=""))) } cat(file=out, paste('}}', sep="")) # we are finished } psych/R/BASS.R0000644000176200001440000000327311061633617012465 0ustar liggesusersBASS<-function(R, maxP=5, Print = "ON"){ #--------------------------------------------------------- # Program to compute Goldberg's Bass Ackwards Procedure # from a correlation matrix (R). PC with Varimax Rotation # Niels Waller, May 10, 2006 # # Program arguments: # R = input correlation matrix # maxP = maximum number of components to rotate # Print = ON/OFF to print summarzed findings to screen #---------------------------------------------------------- varNames <- rownames(R, do.NULL = FALSE, prefix = "var") ULU <- eigen(R) U <- ULU$vectors L <- ULU$values key <- sign(apply(U, 2, sum)) key[key < 0] <- -1 U <- U %*% diag(key) P <- U %*% diag(sqrt(L)) p <- ncol(R) CrossLevelCors <- list(rep(0, p - 1)) T <- list(rep(0, p - 1)) PCloadings <- list(rep(0, p - 1)) for (i in 2:maxP) { vout <- varimax(P[, 1:i], normalize = TRUE, eps = 1e-15) T[[i - 1]] <- vout$rotmat PCloadings[[i - 1]] <- vout$loadings[1:p, ] rownames(PCloadings[[i - 1]]) <- varNames } Z <- paste("Z", 1:maxP, sep = "") V <- paste("V", 1:maxP, sep = "") if (Print == "ON") { cat("nCorrelation of", Z[1], " with ", V[2], "n") } out <- T[[1]][1, ] dim(out) <- c(1, 2) rownames(out) <- Z[1] colnames(out) <- paste(V[2], ".", 1:2, sep = "") CrossLevelCors[[1]] <- out if (Print == "ON") { print(round(out, 3)) } for (i in 2:(maxP - 1)) { if (Print == "ON") { cat("nnnCorrelation of", V[i], " with ", V[i + 1], "nn") } S <- cbind(diag(i), matrix(0, i, 1)) out <- t(T[[i - 1]]) %*% S %*% T[[i]] rownames(out) <- paste(V[i], ".", 1:i, sep = "") colnames(out) <- paste(V[i + 1], ".", 1:(i + 1), sep = "") CrossLevelCors[[i]] <- out if (Print == "ON") { print(round(out, 3)) } } invisible(list(T = T, cors = CrossLevelCors, loadings = PCloadings)) }psych/R/print.psych.schmid.R0000644000176200001440000001420314444406711015460 0ustar liggesusers"print_psych.schmid" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { if(is.null(cut)) cut <- .2 cat( "Schmid-Leiman analysis \n") cat("Call: ") print(x$Call) cat("\nSchmid Leiman Factor loadings greater than ",cut, "\n") loads <- x$sl nfactor <- ncol(loads)-3 if(sort) { ord <- sort(abs(loads[,1]),decreasing=TRUE,index.return=TRUE) loads[,] <- loads[ord$ix,] rownames(loads) <- rownames(loads)[ord$ix] loads <- cbind(v=ord$ix,loads) } #end sort tn <- colnames(loads) loads <- data.frame(loads) colnames(loads) <- tn #this seems weird, but otherwise we lose the F* name if(sort) {loads[,1] <- as.integer(loads[,1]) load.2 <- loads[,2:(nfactor+1)]} else {load.2 <- loads[,1:nfactor] } h2 <- round(loads[,"h2"],digits) u2 <- round(loads[,"u2"],digits) loads <- round(loads,digits) fx <- format(loads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx[abs(loads)< cut] <- paste(rep(" ", nc), collapse = "") # h2 <- round(rowSums(load.2^2),digits) #u2 <- 1 - h2 p2 <- loads[,"p2"] mp2 <- mean(p2) vp2 <- var(p2) p2 <- round(p2,digits) print(cbind(fx[,1:(nfactor+sort)],h2,u2,p2),quote="FALSE") numfactors <- dim(x$sl)[2] -3 eigenvalues <- diag(t(x$sl[,1:numfactors]) %*% x$sl[,1:numfactors]) cat("\nWith eigenvalues of:\n") ev.rnd <- round(eigenvalues,digits) print(ev.rnd,digits=digits) maxmin <- max(eigenvalues[2:numfactors])/min(eigenvalues[2:numfactors]) gmax <- eigenvalues[1]/max(eigenvalues[2:numfactors]) cat("\ngeneral/max " ,round(gmax,digits)," max/min = ",round(maxmin,digits)) cat("\nmean percent general = ",round(mp2,digits), " with sd = ", round(sqrt(vp2),digits), "and cv of ",round(sqrt(vp2)/mp2,digits),"\n") cat("\n The orthogonal loadings were \n") load <- x$orthog cut <- 0 #but, if we are print factors of covariance matrices, they might be very small # cut <- min(cut,max(abs(load))/2) #removed following a request by Reinhold Hatzinger nitems <- dim(load)[1] nfactors <- dim(load)[2] loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems),unclass(load)) u2.order <- 1:nitems #used if items are sorted if(sort) { #first sort them into clusters #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(load),1,which.max) ord <- sort(loads$cluster,index.return=TRUE) loads[1:nitems,] <- loads[ord$ix,] rownames(loads)[1:nitems] <- rownames(loads)[ord$ix] #now sort column wise #now sort the loadings that have their highest loading on each cluster items <- table(loads$cluster) #how many items are in each cluster? first <- 1 item <- loads$item for (i in 1:length(items)) {# i is the factor number if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(loads[first:last,i+2]),decreasing=TRUE,index.return=TRUE) u2.order[first:last] <- item[ord$ix+first-1] loads[first:last,3:(nfactors+2)] <- load[item[ord$ix+first-1],] loads[first:last,1] <- item[ord$ix+first-1] rownames(loads)[first:last] <- rownames(loads)[ord$ix+first-1] first <- first + items[i] } } } #end of sort #they are now sorted, don't print the small loadings if cut > 0 ncol <- dim(loads)[2]-2 rloads <- round(loads,digits) fx <- format(rloads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx.1 <- fx[,1,drop=FALSE] #drop = FALSE preserves the rownames for single factors fx.2 <- fx[,3:(2+ncol)] load.2 <- as.matrix(loads[,3:(ncol+2)]) fx.2[abs(load.2) < cut] <- paste(rep(" ", nc), collapse = "") if(sort) { fx <- data.frame(V=fx.1,fx.2) if(dim(fx)[2] <3) colnames(fx) <- c("V",colnames(x$loadings)) #for the case of one factor } else {fx <- data.frame(fx.2) colnames(fx) <- colnames(x$orthog)} if(nfactors > 1) {if(is.null(x$Phi)) {h2 <- rowSums(load.2^2)} else {h2 <- diag(load.2 %*% x$Phi %*% t(load.2)) }} else {h2 <-load.2^2} if(!is.null(x$uniquenesses)) {u2 <- x$uniquenesses[u2.order]} else {u2 <- u2[u2.order]} #h2 <- round(h2,digits) vtotal <- sum(h2 + u2) if(isTRUE(all.equal(vtotal,nitems))) { cat("Standardized loadings based upon correlation matrix\n") print(cbind(fx,h2,u2),quote="FALSE",digits=digits) } else { cat("Unstandardized loadings based upon covariance matrix\n") print(cbind(fx,h2,u2,H2=h2/(h2+u2),U2=u2/(h2+u2)),quote="FALSE",digits=digits)} #adapted from print.loadings if(is.null(x$Phi)) {if(nfactors > 1) {vx <- colSums(load.2^2) } else {vx <- sum(load.2^2) }} else {vx <- diag(x$Phi %*% t(load) %*% load) } names(vx) <- colnames(x$orthog) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/vtotal) if (nfactors > 1) varex <- rbind(varex, "Cumulative Var"= cumsum(vx/vtotal)) cat("\n") print(round(varex, digits)) if(!is.null(x$dof)) {cat("\nThe degrees of freedom are",x$dof," and the fit is ",round(x$objective,digits),"\n") if(!is.null(x$n.obs)&&!is.na(x$n.obs)) {cat("The number of observations was ",x$n.obs, " with Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits))} } if(!is.null(x$rms)) {cat("\nThe root mean square of the residuals is ", round(x$rms,digits),"\n") } if(!is.null(x$crms)) {cat("The df corrected root mean square of the residuals is ", round(x$crms,digits)) } if(!is.null(x$RMSEA)) {cat("\nRMSEA index = ",round(x$RMSEA[1],digits+1), " and the", (1- x$RMSEA[4])*100,"% confidence intervals are ",round(x$RMSEA[2:3],digits+1)) } if(!is.null(x$BIC)) {cat("\nBIC = ",round(x$BIC,digits))} } psych/R/manhattan.R0000644000176200001440000001025514521203722013700 0ustar liggesusers#Developed 12/1/18 to be part of bestScales approach "manhattan" <- function(x,criteria=NULL,keys=NULL,raw=TRUE,n.obs=NULL,abs=TRUE,ylab=NULL,labels=NULL,log.p=FALSE,ci=.05,pch=21,main="Manhattan Plot of",adjust="holm",ylim=NULL,digits=2,dictionary=NULL, ...) { if(is.null(ylab) ) {if(log.p) {ylab = "- log(10) p of " } else {ylab <- "Correlations with " } } #There are two basic cases: #1) a correlation matrix is provided, and the number of variables is nrow(x) (raw == FALSE) #2) a data matrix is provided and the number of variables is ncol(x) (raw == TRUE, the default) pt.col <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") n.col <- 8 nvar <- ncol(x) if(!is.null(dim(criteria))| (length(criteria) == NROW(x))) { x <- cbind(x,criteria) criteria <- colnames(criteria)} else { if(length(criteria) > 1 ) {criteria <- (criteria) } else {criteria <- "criteria"} } if(raw) {n.obs <- nrow(x) #nvar <- ncol(x) #pt.col <- rainbow(n.col) #here is one way of doing it #these next color are from GGplot2 and are said to color blind friendly if(!is.null(keys) ) {key.ord <- selectFromKeys(keys) num.keys <- length(keys) for (i in 1:num.keys) { select <- sub("-", "", unlist(keys[i])) keys[[i]]<- select} if(is.null(labels)) labels <- names(keys) } else {key.ord <- colnames(x)[1:nvar] } if(is.null(dim(criteria)) ) {x <- x[c(key.ord,criteria)] } else {x <- x[key.ord]} nvar <- length(key.ord) # if(!is.null(dim(criteria))| (length(criteria) == NROW(x))) { x <- cbind(x,criteria) # if(length(criteria) > 1 ){criteria <- names(criteria) } else {criteria <- "criteria"} # } n.crit <- length(criteria) } if(isCorrelation(x) | !raw) {r <- x # case 1 raw <- FALSE nvar <- NROW(x) if(log.p) {if(is.null(n.obs)) {n.obs <- 1000 message("\nn.obs not specified, arbitrarily set to 1000")} } if(!is.null(keys) ) {key.ord <- selectFromKeys(keys) num.keys <- length(keys) for (i in 1:num.keys) { select <- sub("-", "", unlist(keys[i])) keys[[i]]<- select} if(is.null(labels)) {labels <- names(keys)} else {if(labels=="none") {labels =rep("",num.keys)}} } else {key.ord <- rownames(x)[1:nvar] } } else { #case 2 y <- x[,criteria] R <- corr.test(x,y,adjust=adjust,ci=FALSE) #can we make this find polychorics and biserials r <- R$r colnames(r) <- criteria x <- as.matrix(x) raw <- TRUE} n.crit <- length(criteria) if(abs) r <- abs(r) if(log.p) {if(!raw) {r <- -log10(corr.p(r,n.obs,adjust=adjust,ci=FALSE)$p)} else { r <- - log10(R$p)} temp.p <- r temp.p[is.infinite(r)] <- NA min.p <- min(temp.p,na.rm=TRUE) r[is.infinite(r)] <-min.p } if(is.null(ylim)) ylim <- c(min(r[1:nvar,],na.rm=TRUE),max(r[1:nvar,],na.rm=TRUE)) for(i in 1:n.crit) { if(is.null(keys)) { plot(r[1:nvar,i],main=paste(main,criteria[i]),ylab=paste(ylab,criteria[i]), ylim=ylim,pch=21,xlab="",xaxt="n",...) axis(1, at =1:nvar, labels = labels,las=2) } else { plot(NA,xlim=c(1,num.keys),ylim=ylim,ylab=paste(ylab,criteria[i]),main= paste(main,criteria[i]),xlab="",xaxt="n",...) axis(1, at =1:num.keys, labels = labels,las=2) for (k in 1:num.keys) { xloc <- rep(k,length(keys[[k]])) # points(xloc,r[keys[[k]],i],pch=pch,bg=c("black","blue","red")[k %% 3 + 1],...) points(xloc,r[keys[[k]],i],pch=pch,bg=pt.col[k %% n.col + 1],...) } } if(!is.null(ci)) { dif.corrected <- qnorm(1 - ci/(nvar * n.crit)) se <- 1/sqrt(n.obs - 3) upper <- fisherz2r(dif.corrected * se) abline(h=upper,lty="dashed") if(!abs) {abline(h=0) abline(h=-upper,lty="dashed")} } } if(raw) {r <- r[1:nvar,]} else {r <- r[1:nvar,criteria] } if(!is.null(dictionary)) { contents <- lookup(rownames(r),dictionary) results <- data.frame(round(r,digits=digits)) results <- merge(results,contents,by="row.names",all.x=TRUE,sort=FALSE) rownames(results) <- results[,"Row.names"] results <- results[-1] #now put it back into the correct order r <- results } invisible(r) }psych/R/ICC.R0000644000176200001440000001537114210441575012334 0ustar liggesusers#revised March, 9, 2018 to include lmer option #this makes it much faster for large data sets, particularly with missing data "ICC" <- function(x,missing=TRUE,alpha=.05,lmer=TRUE,check.keys=FALSE) { cl <- match.call() if(is.matrix(x)) x <- data.frame(x) n.obs.original <- dim(x)[1] if(missing &!lmer) { x1 <- try(na.fail(x)) if(inherits(x1, as.character("try-error"))) { x <- na.omit(x) n.obs <- dim(x)[1] stop("missing data were found for ",n.obs.original -n.obs, " cases \n Try again with na.omit or set missing= FALSE and proceed at your own risk, or try the lmer=TRUE option")}} n.obs <- dim(x)[1] if(n.obs < n.obs.original) message("Warning, missing data were found for ",n.obs.original -n.obs, " cases") # n.obs <- NROW(x) items <- colnames(x) n.items <- NCOL(x) if(check.keys) { min.item <- min(x[items],na.rm=TRUE) max.item <- max(x[items],na.rm=TRUE) p1 <- pca(x) keys <- rep(1,n.items) if(any(p1$loadings < 0)) { message("Some items were negatively correlated with total scale and were automatically reversed.\n This is indicated by a negative sign for the variable name.") keys[p1$loadings < 0] <- -1 } if(any(keys < 0)) { # #then find the x and y scores newx <- t(t(x) * keys + (keys < 0)* (max.item + min.item) ) #these are now rescaled in the keyed direction names(keys) <-colnames(x) cat("\ reversed items\n ",names(keys)[keys < 0]) x <- as.data.frame(newx) } else { message("Some items were negatively correlated with total scale. To correct this problem, run again with check.keys=TRUE") } } # nj <- dim(x)[2] x.s <- stack(x) x.df <- data.frame(x.s,subs=rep(paste("S",1:n.obs,sep=""),nj)) #choose to either do lmer or aov if(lmer) { #lmer colnames(x.df ) <- c("values","items","id") #this makes it simpler to understand mod.lmer <- lme4::lmer(values ~ 1 + (1 | id) + (1 | items), data=x.df,na.action=na.omit) vc <- lme4::VarCorr(mod.lmer) MS_id <- vc$id[1,1] MS_items <- vc$items[1,1] MSE <- error <- MS_resid <- (attributes(vc)$sc)^2 MS.df <- data.frame(variance= c(MS_id ,MS_items, MS_resid,NA)) rownames(MS.df) <- c("ID","Items", "Residual","Total") MS.df["Total",] <- sum(MS.df[1:3,1],na.rm=TRUE) MS.df["Percent"] <- MS.df/MS.df["Total",1] lmer.MS <- MS.df #save these #convert to AOV equivalents #changed to a cleaner form 8/18/21 MSB <- nj * MS_id + error MSJ <- n.obs * MS_items + error MSW <- error + MS_items stats <- matrix(NA,ncol=3,nrow=5) #create an anova equivalent table from the lmer results dfB <- n.obs -1 dfJ <- nj - 1 dfE <- ( n.obs -1) * ( nj - 1) FB <- MSB/MSE FJ <- MSJ/MSE stats[1,] <- c(dfB,dfJ,dfE) stats[2,] <- c( MSB *( n.obs -1),MSJ *(nj -1),MSE * (n.obs-1) *(nj-1)) stats[3,] <- c(MSB, MSJ,MSE) stats[4,] <- c(MSB/MSE, MSJ/MSE, NA) stats[5,] <- c(-expm1(pf(FB,dfB,dfE,log.p=TRUE)), -expm1(pf(FJ,dfJ,dfE,log.p=TRUE)) , NA) s.aov <- mod.lmer } else { #AOV aov.x <- aov(values~subs+ind,data=x.df) s.aov <- summary(aov.x) stats <- matrix(unlist(s.aov),ncol=3,byrow=TRUE) MSB <- stats[3,1] MSW <- (stats[2,2] + stats[2,3])/(stats[1,2] + stats[1,3]) MSJ <- stats[3,2] MSE <- stats[3,3] MS.df <- NULL } colnames(stats) <- c("subjects","Judges", "Residual") rownames(stats) <- c("df","SumSq","MS","F","p") #McGraw and Wong use MSr for MSB ICC1 <- (MSB- MSW)/(MSB+ (nj-1)*MSW) ICC2 <- (MSB- MSE)/(MSB + (nj-1)*MSE + nj*(MSJ-MSE)/n.obs) ICC3 <- (MSB - MSE)/(MSB+ (nj-1)*MSE) ICC12 <- (MSB-MSW)/(MSB) #MW case 2 ICC22 <- (MSB- MSE)/(MSB +(MSJ-MSE)/n.obs) ICC32 <- (MSB-MSE)/MSB #MW Case 3a #find the various F values from Shrout and Fleiss F11 <- MSB/MSW df11n <- n.obs-1 df11d <- n.obs*(nj-1) p11 <- -expm1(pf(F11,df11n,df11d,log.p=TRUE)) F21 <- MSB/MSE df21n <- n.obs-1 df21d <- (n.obs-1)*(nj-1) # p11 <- 1-pf(F11,df11n,df11d) # p21 <- 1-pf(F21,df21n,df21d) p21 <- - expm1(pf(F21,df21n,df21d,log.p=TRUE)) F31 <- F21 # results <- t(results) results <- data.frame(matrix(NA,ncol=8,nrow=6)) colnames(results ) <- c("type", "ICC","F","df1","df2","p","lower bound","upper bound") rownames(results) <- c("Single_raters_absolute","Single_random_raters","Single_fixed_raters", "Average_raters_absolute","Average_random_raters","Average_fixed_raters") results[,1] <- c("ICC1","ICC2","ICC3","ICC1k","ICC2k","ICC3k") results[,2] <- c(ICC1,ICC2,ICC3,ICC12,ICC22,ICC32) results[,3] <- c(F11,F21,F21,F11,F21,F21) results[,4] <- c(df11n,df21n,df21n,df11n,df21n,df21n ) results[,5] <- c(df11d, df21d, df21d, df11d, df21d, df21d ) results[,6] <- c(p11 ,p21, p21, p11 ,p21,p21) # results[1,1] = "ICC1" # results[2,1] = "ICC2" # results[3,1] = "ICC3" # results[4,1] = "ICC1k" # results[5,1] = "ICC2k" # results[6,1] = "ICC3k" # results[1,2] = ICC1 # results[2,2] = ICC2 # results[3,2] = ICC3 # results[4,2] = ICC12 # results[5,2] = ICC22 # results[6,2] = ICC32 # results[1,3] <- results[4,3] <- F11 # results[2,3] <- F21 # results[3,3] <- results[6,3] <- results[5,3] <- # results[5,3] <- F21 # results[1,4] <- results[4,4] <- df11n #results[4,5] <- df11d # results[2,4] <- results[3,4] <- results[5,4] <- results[6,4] <- df21n # results[2,5] <- results[3,5] <- results[5,5] <- results[6,5] <- df21d # results[2,6] <- results[5,6] <- results[3,6] <-results[6,6] <- p21 #now find confidence limits #first, the easy ones #don't divide alpha level by 2 (changed on 2/1/14) #fixed again? onf 5/21/19 #and fixed again to divide alpha by 2 following Mijke Rhemtulla's thoughtful comment F1L <- F11 / qf(1-alpha/2,df11n,df11d) F1U <- F11 * qf(1-alpha/2,df11d,df11n) L1 <- (F1L-1)/(F1L+(nj-1)) U1 <- (F1U -1)/(F1U+nj-1) F3L <- F31 /qf(1-alpha/2,df21n,df21d) F3U <- F31 * qf(1-alpha/2,df21d,df21n) results[1,7] <- L1 results[1,8] <- U1 results[3,7] <- (F3L-1)/(F3L+nj-1) results[3,8] <- (F3U-1)/(F3U+nj-1) results[4,7] <- 1- 1/F1L results[4,8] <- 1- 1/F1U results[6,7] <- 1- 1/F3L results[6,8] <- 1 - 1/F3U #the hard one is case 2 Fj <- MSJ/MSE vn <- (nj-1)*(n.obs-1)* ( (nj*ICC2*Fj+n.obs*(1+(nj-1)*ICC2) - nj*ICC2))^2 vd <- (n.obs-1)*nj^2 * ICC2^2 * Fj^2 + (n.obs *(1 + (nj-1)*ICC2) - nj*ICC2)^2 v <- vn/vd F3U <- qf(1-alpha/2,n.obs-1,v) F3L <- qf(1-alpha/2,v,n.obs-1) L3 <- n.obs *(MSB- F3U*MSE)/(F3U*(nj*MSJ+(nj*n.obs-nj-n.obs)*MSE)+ n.obs*MSB) results[2,7] <- L3 U3 <- n.obs *(F3L * MSB - MSE)/(nj * MSJ + (nj * n.obs - nj - n.obs)*MSE + n.obs * F3L * MSB) results[2,8] <- U3 L3k <- L3 * nj/(1+ L3*(nj-1)) U3k <- U3 * nj/(1+ U3*(nj-1)) results[5,7] <- L3k results[5,8] <- U3k #clean up the output # results[,2:8] <- results[,2:8] result <- list(results=results,summary=s.aov,stats=stats,MSW=MSW,lme = MS.df,Call=cl,n.obs=n.obs,n.judge=nj) class(result) <- c("psych","ICC") return(result) } psych/R/ICLUST.sort.R0000644000176200001440000000566012213116103013712 0ustar liggesusers"iclust.sort"<- function (ic.load,cut=0,labels=NULL,keys=FALSE, clustsort=TRUE) {ICLUST.sort(ic.load,labels,keys,clustsort)} "ICLUST.sort"<- function (ic.load,cut=0,labels=NULL,keys=FALSE, clustsort=TRUE) { if(is.matrix(ic.load)) {loadings <- ic.load pattern <- as.matrix(loadings)} else { loadings <- ic.load$loadings pattern <- as.matrix(ic.load$pattern)} nclust <- dim(loadings)[2] nitems <- dim(loadings)[1] loadings <- as.matrix(loadings) #just in case there is just one cluster loadings <- unclass(loadings) #to get around the problem of a real loading matrix if(nclust > 1) {eigenvalue <- diag(t(pattern)%*% loadings) #put the clusters into descending order by eigenvalue evorder <- order(eigenvalue,decreasing=TRUE) if(clustsort) loadings <- loadings[,evorder] #added the clustsort option 2011.12.22 until now had always sorted } if (length(labels)==0) { var.labels <- rownames(loadings)} else {var.labels=labels} if (length(var.labels)==0) {var.labels =paste('V',seq(1:nitems),sep='')} #unlabled variables loads <- data.frame(item=seq(1:nitems),content=var.labels,cluster=rep(0,nitems),loadings) #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(loadings),1,which.max) for (i in 1:nitems) {if (abs(loadings[i,loads$cluster[i]]) < cut) {loads$cluster[i] <- nclust+1}} #assign the ones that missed the cut a location ord <- sort(loads$cluster,index.return=TRUE) loads[1:nitems,] <- loads[ord$ix,] rownames(loads)[1:nitems] <- rownames(loads)[ord$ix] items <- c(table(loads$cluster),1) #how many items are in each cluster? if(length(items) < (nclust+1)) {items <- rep(0,(nclust+1)) #this is a rare case where some clusters don't have anything in them for (i in 1:nclust+1) {items[i] <- sum(loads$cluster==i) } } #now sort the loadings that have their highest loading on each cluster first <- 1 for (i in 1:nclust) { if(items[i]>0 ) { last <- first + items[i]- 1 ord <- sort(abs(loads[first:last,i+3]),decreasing=TRUE,index.return=TRUE) loads[first:last,] <- loads[ord$ix+first-1,] rownames(loads)[first:last] <- rownames(loads)[ord$ix+first-1] first <- first + items[i]} } if (first < nitems) loads[first:nitems,"cluster"] <- 0 #assign items less than cut to 0 if(keys) {result <- list(sorted=loads,clusters=factor2cluster(loadings))} else result <- list(sorted=loads) class(result) <- c("psych","iclust.sort") #need to clean up print to make this work return(result) } #revised August 8, 2007 to add cluster keying option and to allow us to work with factor analysis output #revised Sept 15, 2007 to remove the "loadings" parameter #revised Ausgust 30, 2008 to make class psych #revised August 28, 2012 to meet a request from Gundmundur Arnkelsson to be able to print from principal output. psych/R/plot.psych.R0000644000176200001440000000717214444461115014042 0ustar liggesusers"plot.psych" <- function(x,labels=NULL,...) { result <- NULL vss <- iclust <- omega <- fa <- irt.fa <- irt.poly <- principal <- parallel <- set.cor <- residuals <- reliability <- FALSE if(length(class(x)) > 1) { #This next section was added December 7, 2019 to change from class(x)[2] to inherits(x, ...) names <- cs(irt.fa,irt.poly,vss,iclust,fa, principal,omega,parallel,set.cor,residuals,reliability) value <- inherits(x,names,which=TRUE) # value <- class(x)[2] if(any(value > 1) ) { value <- names[which(value > 0)]} else {value <- "None"} } else {stop ("I am sorry, this is not an object I know how to plot")} switch(value, vss = { n=dim(x) symb=c(49,50,51,52) #plotting sym plot(x$cfit.1,ylim=c(0,1),type="b",ylab="Very Simple Structure Fit",xlab="Number of Factors",pch=49) x$cfit.3<- x$vss.stats$cfit.3 x$cfit.4<- x$vss.stats$cfit.4 title <- x$title title(main=title) x$cfit.2[1]<-NA x$cfit.3[1]<-NA x$cfit.3[2]<-NA x$cfit.4[1]<-NA x$cfit.4[2]<-NA x$cfit.4[3]<-NA lines(x$cfit.2) points(x$cfit.2,pch=50) lines(x$cfit.3) points(x$cfit.3,pch=symb[3]) lines(x$cfit.4) points(x$cfit.4,pch=symb[4]) }, iclust = { op <- par(no.readonly = TRUE) # the whole list of settable par's. cut <- 0 # if(iclust) { load <- x$loadings cat("Use ICLUST.diagram to see the hierarchical structure\n") # } else {load <- x$schmid$orthog # cat("Use omega.diagram to see the hierarchical structure\n") } nc <- dim(load)[2] nvar <- dim(load)[1] ch.col=c("black","blue","red","gray","black","blue","red","gray") cluster <- rep(nc+1,nvar) cluster <- apply( abs(load) ,1,which.max) cluster[(apply(abs(load),1,max) < cut)] <- nc+1 if (nc > 2 ) { pairs(load,pch = cluster+19,col=ch.col[cluster],bg=ch.col[cluster]) } else { plot(load,pch = cluster+20,col=ch.col[cluster],bg=ch.col[cluster],...) abline(h=0) abline(v=0)} if(is.null(labels)) labels <- paste(1:nvar) if(nc <3) text(load,labels,pos=1) par(op) }, omega = { op <- par(no.readonly = TRUE) # the whole list of settable par's. cut <- 0 if(iclust) { load <- x$loadings cat("Use ICLUST.diagram to see the hierarchical structure\n") } else {load <- x$schmid$orthog cat("Use omega.diagram to see the hierarchical structure\n") } nc <- dim(load)[2] nvar <- dim(load)[1] ch.col=c("black","blue","red","gray","black","blue","red","gray") cluster <- rep(nc+1,nvar) cluster <- apply( abs(load) ,1,which.max) cluster[(apply(abs(load),1,max) < cut)] <- nc+1 if (nc > 2 ) { pairs(load,pch = cluster+19,cex=1.5,col=ch.col[cluster],bg=ch.col[cluster]) } else { plot(load,pch = cluster+20,col=ch.col[cluster],bg=ch.col[cluster],...) abline(h=0) abline(v=0)} if(is.null(labels)) labels <- paste(1:nvar) if(nc <3) text(load,labels,pos=1) par(op) }, set.cor ={ plot(x$cancor2,typ="b",ylab="Squared Canonical Correlation",xlab="Canonical variate",main="Scree of canonical correlations" ,ylim=c(0,1),...)}, lmCor ={ plot(x$cancor2,typ="b",ylab="Squared Canonical Correlation",xlab="Canonical variate",main="Scree of canonical correlations" ,ylim=c(0,1),...)}, irt.fa = {result <- plot.irt(x,labels=labels,...)}, irt.poly = { result <- plot.poly(x,labels=labels,...)}, fa = {fa.plot(x,labels=labels,...)}, principal = {fa.plot(x,labels=labels,...)}, parallel = {plot_fa.parallel(x,...)}, reliability = {plot.reliability(x,...)}, residuals = {plot.residuals(x,...)}, none = {stop ("I am sorry, this is not an object I know how to plot")} ) if(!is.null(result)) {class(result) <- c("psych","polyinfo") invisible(result)} }psych/R/esem.diagram.R0000644000176200001440000005157214547537207014310 0ustar liggesusers#adapted from my structure.diagram function to draw the output from esem #August 4, 2016 "esemDiagram" <- "esem.diagram" <- function(esem=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE,regression=FALSE,lr=TRUE, digits=1,e.size=.1,adj=2,main="Exploratory Structural Model", ...){ cancor <- FALSE #a helper function sort.f <- function(x) { nvar <- ncol(x) if(is.null(nvar)) {return(x)} else { nitem <- nrow(x) cluster <- data.frame(item <- seq(1:nitem),clust = rep(0,nitem)) cluster$clust <- apply(abs(x),1,which.max) ord <- sort(cluster$clust,index.return=TRUE) x[1:nitem,] <- x[ord$ix,] rownames(x) <- rownames(x)[ord$ix] return(x)}} #first some default values if(length(class(esem)) > 1) { names <- cs(lm, lavaan,fa,principal, esem,mediate, lmCor, setCor, fa.multi,lm) value <- inherits(esem,names,which=TRUE) # value <- class(x)[2] if(any(value > 0) ) { value <- names[which(value > 0)]} else {value <- "other"} if(value=="lmCor" ) {cancor<- TRUE} else {cancor<- FALSE} } xmodel <- sort.f(esem$loadsX) ymodel <- sort.f(esem$loadsY) Phi <- esem$Phi num.y <- num.x <- 0 #we assume there is nothing there vars<- NULL num.xvar <- dim(xmodel)[1] #how many x variables? num.yvar <- dim(ymodel)[1] num.xfactors <- dim(xmodel)[2] num.yfactors <- dim(ymodel)[2] if(is.null(num.xvar)) num.xvar <- length(xmodel) if(is.null(num.yvar)) num.yvar <- length(ymodel) if(is.null(num.xfactors)) num.xfactors <- 1 if(is.null(num.yfactors)) num.yfactors <- 1 # if(max(num.xvar,num.yvar) < 10) e.size <- e.size* 2 #make the ellipses bigger for small problems e.size <- e.size * 10/ max(num.xvar,num.yvar) if(is.null(labels)) { xvars <- rownames(xmodel)} else { xvars <- vars <- labels} if(is.null(ncol(xmodel))) xvars <- names(xmodel) # if(is.null(vars) ) {xvars <- paste0("x",1:num.xvar) } fact <- colnames(xmodel) if (is.null(fact)) { fact <- paste0("X",1:num.xfactors) } if(is.null(ncol(ymodel))) {yvars <-names(ymodel) } else {yvars <- rownames(ymodel)} if(is.null(yvars)) {yvars <- paste0("y",1:num.y) } if(is.null(labels)) {vars <- c(xvars,yvars)} else {yvars <- labels[(num.xvar+1):(num.xvar+num.y)]} yfact <- colnames(ymodel) if(is.null(yfact)) {yfact <- paste0("Y",1:num.yfactors) } fact <- c(fact,yfact) num.var <- num.xvar + num.y num.factors <- num.xfactors + num.yfactors sem <- matrix(rep(NA),6*(num.var*num.factors + num.factors),ncol=3) #this creates an output model for sem analysis lavaan <- vector("list",num.xfactors + num.yfactors) #create a list for lavaan colnames(sem) <- c("Path","Parameter","Value") var.rect <- list() fact.rect <- list() # length.labels <- 0 # a filler for now #plot.new() is necessary if we have not plotted before #strwd <- try(strwidth(xvars),silent=TRUE) strwd <- try(length.labels <- max(strwidth(xvars),strwidth(yvars),strwidth("abc"))/1.8,silent=TRUE) #although this throws an error if the window is not already open, we don't show it #if (class(strwd) == "try-error" ) {plot.new() } if (inherits(strwd, "try-error" )) {length.labels = max(nchar(xvars),3)/1.8 } #length.labels <- max(strwidth(xvars),strwidth("abc"))/1.8 if(lr) {limx <- c(-(length.labels),max(num.xvar,num.yvar)+3 ) limy <- c(0,max(num.xvar,num.yvar)+1) } else { limy <- c(-(length.labels),max(num.xvar,num.yvar) +3 ) limx <- c(0,max(num.xvar,num.yvar)+1) if( errors) limy <- c(-1,max(num.xvar,num.yvar)+2)} scale.xaxis <- 3 if(lr) {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main)} else {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) } #now draw the x part k <- num.factors x.adjust <- num.xvar/ max(num.xvar,num.yvar) for (v in 1:num.xvar) { if(lr) { var.rect[[v]] <- dia.rect(0,(num.xvar-v+1)/x.adjust,xvars[v],xlim=limx,ylim=limy,...) } else { var.rect[[v]] <- dia.rect(v,0,xvars[v],xlim=limy,ylim=limx,...) } } nvar <- num.xvar f.scale <- (num.xvar+ 1)/(num.xfactors+1)/x.adjust factors <- round(xmodel,digits) if(is.null(ncol(factors))) factors <- matrix(factors) if (num.xfactors >0) { for (f in 1:num.xfactors) { if(!regression) {if(lr) {fact.rect[[f]] <- dia.ellipse(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,...)} else {fact.rect[[f]] <- dia.ellipse(f*f.scale,limy[2]/scale.xaxis,fact[f],ylim=c(0,nvar),xlim=c(0,nvar),e.size=e.size,...) } } else {if(lr) {fact.rect[[f]] <- dia.rect(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} else { fact.rect[[f]] <- dia.rect(f*f.scale,limy[2]/scale.xaxis,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} } for (v in 1:num.xvar) { if(is.numeric(factors[v,f])) { if(simple && (abs(factors[v,f]) == max(abs(factors[v,])) ) && (abs(factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if (!regression) {if(lr){dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1),adj=f %% adj +1) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1)) } } else {dia.arrow(to=fact.rect[[f]]$left,from=var.rect[[v]]$right,labels =factors[v,f],col=((sign(factors[v,f])<0) +1))} } } else { if (factors[v,f] !="0") { if (!regression) { if(lr) {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f]) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f])} } else {if(lr) {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$right,labels =factors[v,f])} else {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$top,labels =factors[v,f])} } } } } } if (num.xfactors ==1) { lavaan[[1]] <- paste(fact[1],"=~ ") for(i in 1:num.xvar) { sem[i,1] <- paste(fact[1],"->",vars[i],sep="") lavaan[[1]] <- paste0(lavaan[[1]], ' + ', vars[i]) if(is.numeric(factors[i])) {sem[i,2] <- vars[i]} else {sem[i,2] <- factors[i] } }} #end of if num.xfactors ==1 k <- num.xvar+1 k <- 1 for (f in 1:num.xfactors) { #if (!is.numeric(factors[i,f]) || (abs(factors[i,f]) > cut)) lavaan[[f]] <- paste0(fact[f] ," =~ ") for (i in 1:num.xvar) { if((!is.numeric(factors[i,f] ) && (factors[i,f] !="0"))|| ((is.numeric(factors[i,f]) && abs(factors[i,f]) > cut ))) { sem[k,1] <- paste(fact[f],"->",vars[i],sep="") lavaan[[f]] <- paste0(lavaan[[f]], ' + ', vars[i]) if(is.numeric(factors[i,f])) {sem[k,2] <- paste("F",f,vars[i],sep="")} else {sem[k,2] <- factors[i,f]} k <- k+1 } #end of if } } } #end of if num.xfactors >0 if(errors) { for (i in 1:num.xvar) {if(lr) { dia.self(var.rect[[i]],side=2) } else { dia.self(var.rect[[i]],side=1)} sem[k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[k,2] <- paste("x",i,"e",sep="") k <- k+1 } } #do it for y model if(!is.null(ymodel)) { if(lr) { y.adj <- num.yvar/ max(num.xvar,num.yvar) #y.adj <- num.yvar/2 - num.xvar/2 f.yscale <- limy[2]/(num.yfactors+1) y.fadj <- 0} else { # y.adj <- num.xvar/2 - num.yvar/2 y.adj <- num.yvar/2 f.yscale <- limx[2]/(num.yfactors+1) y.fadj <- 0} for (v in 1:num.yvar) { if(lr){ var.rect[[v+num.xvar]] <- dia.rect(limx[2]-.35,limy[2]-v /y.adj,yvars[v],xlim=limx,ylim=limy,...)} else { var.rect[[v+num.xvar]] <- dia.rect(v / y.adj,limx[2],yvars[v],xlim=limy,ylim=limx,...)} } } #we have drawn the y variables, now should we draw the Y factors if(!is.null(ymodel)){ y.factors <- round(ymodel,digits) if(is.null(ncol(y.factors))) { y.factors <- matrix(y.factors) } num.y <- nrow(y.factors) for (f in 1:num.yfactors) {if(lr) { fact.rect[[f+num.xfactors]] <- dia.ellipse(2*limx[2]/scale.xaxis,(num.yfactors+1-f)*f.yscale +y.fadj,yfact[f],xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,...)} else { fact.rect[[f+num.xfactors]] <- dia.ellipse(f*f.yscale+ y.fadj,2*limx[2]/scale.xaxis,yfact[f],ylim=c(0,nvar),xlim=c(0,nvar),e.size=e.size,...)} for (v in 1:num.yvar) {if(is.numeric(y.factors[v,f])) { {if(simple && (abs(y.factors[v,f]) == max(abs(y.factors[v,])) ) && (abs(y.factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if(lr) { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1),adj = f %% adj +1)} else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1))} } } } else {if(factors[v,f] !="0") {if(lr) {dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f] ,lty=((sign(factors[v,f])<0) +1))} else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],lty=((sign(factors[v,f])< 0) +1)) } } }} } if (num.yfactors ==1) { lavaan[[num.xfactors +1 ]] <- paste(fact[num.xfactors +1], "=~") for (i in 1:num.y) { sem[k,1] <- paste(fact[1+num.xfactors],"->",yvars[i],sep="") lavaan[[num.xfactors +1]] <- paste0(lavaan[[num.xfactors +1]], ' + ', yvars[i]) if(is.numeric(y.factors[i] ) ) {sem[k,2] <- paste("Fy",yvars[i],sep="")} else {sem[k,2] <- y.factors[i]} k <- k +1 } } else { #end of if num.yfactors ==1 for (i in 1:num.y) { for (f in 1:num.yfactors) { lavaan[[num.xfactors +f ]] <- paste(fact[num.xfactors +f], "=~") if( abs(y.factors[i,f]) > cut ) { sem[k,1] <- paste(fact[f+num.xfactors],"->",vars[i+num.xvar],sep="") lavaan[[num.xfactors +f]] <- paste0(lavaan[[num.xfactors +f]], ' + ', yvars[i]) if(is.numeric(y.factors[i,f])) { sem[k,2] <- paste("Fy",f,vars[i+num.xvar],sep="")} else {sem[k,2] <- y.factors[i,f]} k <- k+1 } #end of if } #end of factor } # end of variable loop } #end of else if # } if(errors) { for (i in 1:num.y) { if(lr) {dia.self(var.rect[[i+num.xvar]],side=3) } else {dia.self(var.rect[[i+num.xvar]],side=3)} sem[k,1] <- paste(vars[i+num.xvar],"<->",vars[i+num.xvar],sep="") sem[k,2] <- paste("y",i,"e",sep="") k <- k+1 }} } #end of if.null(ymodel) # if(!is.null(Rx)) {#draw the correlations between the x variables # for (i in 2:num.xvar) { # for (j in 1:(i-1)) { # if((!is.numeric(Rx[i,j] ) && ((Rx[i,j] !="0")||(Rx[j,i] !="0")))|| ((is.numeric(Rx[i,j]) && abs(Rx[i,j]) > cut ))) { # if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} # } else { # if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} # } # }} # } # # } # if(!is.null(Ry)) {#draw the correlations between the y variables # for (i in 2:num.yvar) { # for (j in 1:(i-1)) { # if((!is.numeric(Ry[i,j] ) && ((Ry[i,j] !="0")||(Ry[j,i] !="0")))|| ((is.numeric(Ry[i,j]) && abs(Ry[i,j]) > cut ))) { # if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} # } else { # if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} # } # }} # } # # } Phi <- round(Phi,digits) if(!regression) { if(!is.null(Phi)) {if (!is.matrix(Phi)) { if(!is.null(FALSE)) {Phi <- matrix(c(1,0,Phi,1),ncol=2)} else {Phi <- matrix(c(1,Phi,Phi,1),ncol=2)}} if(num.xfactors>1) {for (i in 2:num.xfactors) { #first do the correlations within the f set for (j in 1:(i-1)) { {if((!is.numeric(Phi[i,j] ) && ((Phi[i,j] !="0")||(Phi[j,i] !="0")))|| ((is.numeric(Phi[i,j]) && abs(Phi[i,j]) > cut ))) { if(Phi[i,j] == Phi[j,i] ) { if(lr) {dia.curve(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curve(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="") lavaan[[num.xfactors +num.yfactors +1]] <- paste(fact[i], "~~", fact[j])} else {#directed arrows if(Phi[i,j] !="0") { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$right,to=fact.rect[[i]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$top,to=fact.rect[[i]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[j]," ->",fact[i],sep="") sem[k,2] <- paste("rF",j,"F",i,sep="")} else { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[i],"<-",fact[j],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j], "~", fact[i]) sem[k,2] <- paste("rF",i,"F",j,sep="")} } } else { sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",i,"F",j,sep="")} else {sem[k,2] <- Phi[i,j] } } k <- k + 1} } } } #end of correlations within the fx set if(!is.null(ymodel)) { if(!cancor) { for (i in 1:num.xfactors) { for (j in 1:num.yfactors) { if((!is.numeric(Phi[j+num.xfactors,i] ) && (Phi[j+num.xfactors,i] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i]) && abs(Phi[j+num.xfactors,i]) > cut ))) { dia.arrow(from=fact.rect[[i]],to=fact.rect[[j+num.xfactors]],Phi[j+num.xfactors,i],adj=i %% adj +1) sem[k,1] <- paste(fact[i],"->",fact[j+num.xfactors],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j+num.xfactors], "~", fact[i]) } else { sem[k,1] <- paste(fact[i],"<->",fact[j+num.xfactors],sep="")} if (is.numeric(Phi[j+num.xfactors,i])) {sem[k,2] <- paste("rX",i,"Y",j,sep="")} else {sem[k,2] <- Phi[j+num.xfactors,i] } k <- k + 1 } }} else { #we just plot the canoncial correlations which are just between the factors for(i in 1:num.xfactors) { for (j in 1:num.yfactors) { if(Phi[j,i] > cut) dia.arrow(to=fact.rect[[i]],from=fact.rect[[j+num.xfactors]],Phi[j,i],both=TRUE) }} } } } } if(num.factors > 0 ) { for(f in 1:num.factors) { sem[k,1] <- paste(fact[f],"<->",fact[f],sep="") sem[k,3] <- "1" k <- k+1 } model=sem[1:(k-1),] class(model) <- "mod" #suggested by John Fox to make the output cleaner lavaan <- unlist(lavaan) lavaan <- noquote(lavaan) result <- list(sem=model,lavaan=lavaan) invisible(result) } } #use the esemDiagram function to draw a canonical correlation diagram cancorDiagram <- function(fit, cut=.1,digits=2, nfX = NULL, nfY =NULL,simple=FALSE,e.size=.1,main="Canonical Correlation",...) { esem <- list() if(is.null(nfX)) nfX <- NCOL(fit$Xmat ) if(is.null(nfY)) nfY <- NCOL(fit$Ymat ) esem$loadsX <- fit$Xmat[,1:nfX,drop=FALSE] esem$loadsY <- fit$Ymat[,1:nfY,drop=FALSE] if(is.complex(fit$cancor)) fit$cancor <- Re(fit$cancor) #don't work with imaginary roots esem$Phi <- diag(fit$cancor) # if(is.complex(esem$Phi)) esem$Phi <- Re(esem$Phi) class(esem) <- class(fit) esemDiagram(esem, cut=cut,digits=digits,simple=simple,e.size=e.size, main=main,...) } psych/R/cta.15.R0000644000176200001440000001224114444434513012725 0ustar liggesusers"cta.15" <- function(n=3,t=5000, cues = NULL,stim = NULL, act=NULL, inhibit=NULL, consume = NULL,ten = NULL, type="both",fast=2 ) { #simulation of the CTA reparamaterization of the dynamics of action #revised 6/10/22 to include a stimulation matrix compare <- FALSE if(n > 4){ colours <- rainbow(n)} else {colours <- c("black","blue", "red", "green") } step <- .05 ten.start <- ten act.start <- act tendencies.m <- matrix(NA,ncol=t,nrow=n) actions.m <- matrix(NA,ncol=t,nrow=n) if(is.null(stim)) stim <- diag(1,n) #added 6/10/22 if(is.null(cues)) {cues <- 2^(n-1:n)} if(is.null(inhibit)) {inhibit <- matrix(1,ncol=n,nrow=n) diag(inhibit) <- .05} if(n>1) {colnames(inhibit) <- rownames(inhibit) <- paste("A",1:n,sep="")} if(is.null(consume) ) {consume <- diag(.05,ncol=n,nrow=n) } excite <- diag(step,n) #first run for time= t to find the maximum values to make nice plots as well as to get the summary stats if (is.null(ten.start)) {ten <- rep(0,n)} else {ten <- ten.start} if(is.null(act.start) ) {act <- cues} else {act <- act.start} maxact <- minact <- minten <- maxten <- 0 counts <- rep(0,n) transitions <- matrix(0,ncol=n,nrow=n) frequency <- matrix(0,ncol=n,nrow=n) actions <- tendencies <- rep(0,n) acts <- tends <- rep(0,n) colnames(frequency) <- paste("T",1:n,sep="") rownames(frequency) <- paste("F",1:n,sep="") names(tendencies) <- paste("T",1:n,sep="") names(actions) <- paste("A",1:n,sep="") old.act <- which.max(act) for (i in 1:t) { ten <- cues %*% stim %*% excite + ten - act %*% excite %*% consume #but where are the sensitivities? act <- ten %*% excite + act - act %*% excite %*% inhibit act[act<0] <- 0 tendencies <- tendencies + ten actions <- actions + act maxact <- max(maxact,act) minact <- min(minact,act) maxten <- max(maxten,ten) minten <- min(minten,ten) which.act <- which.max(act) counts[which.act] <- counts[which.act]+1 acts[which.act] <- acts[which.act] + act[which.act] tends[which.act] <- tends[which.act] + ten[which.act] transitions[old.act,which.act] <- transitions[old.act,which.act] + 1 if(old.act!=which.act) { frequency[old.act,which.act] <- frequency[old.act,which.act] + 1 frequency[which.act,which.act] <- frequency[which.act,which.act] +1} old.act <- which.act tendencies.m[,i] <- ten actions.m[,i] <- act } yl <- range(tendencies.m) plot(tendencies.m[1,],ylim=yl,xlab="time",ylab="Tendency",col="black",typ="l",main="Action tendencies over time",lwd=2) for (j in 2:n) {points(tendencies.m[j,],lty=j,col=colours[j],typ="l",lwd=2)} yl <- range(actions.m) plot(actions.m[1,],ylim=yl,xlab="time",ylab="Action Strength",col="black",typ="l",main="Actions over time",lwd=2) for (j in 2:n) {points(actions.m[j,],lty=j,col=colours[j],typ="l",lwd=2)} if(FALSE){ #now do various types of plots, depending upon the type of plot desired plots <- 1 action <- FALSE #state diagrams plot two tendencies agaist each other over time if (type!="none") {if (type=="state") { op <- par(mfrow=c(1,1)) if (is.null(ten.start)) {ten <- rep(0,n)} else {ten <- ten.start} if(is.null(act.start) ) {act <- cues} else {act <- act.start} plot(ten[1],ten[2],xlim=c(minten,maxten),ylim=c(minten,maxten),col="black", main="State diagram",xlab="Tendency 1", ylab="Tendency 2") for (i in 1:t) { ten <- cues %*% excite + ten - act %*% excite %*% consume act <- ten %*% excite + act - act %*% excite %*% inhibit act[act<0] <- 0 if(!(i %% fast)) points(ten[1],ten[2],col="black",pch=20,cex=.2) } } else { #the basic default is to plot action tendencies and actions in a two up graph if(type=="both") {if(compare) {op <- par(mfrow=c(2,2))} else {op <- par(mfrow=c(2,1))} plots <- 2 } else {op <- par(mfrow=c(1,1))} if (type=="action") {action <- TRUE} else {if(type=="tend" ) action <- FALSE} for (k in 1:plots) { if (is.null(ten.start)) {ten <- rep(0,n)} else {ten <- ten.start} if(is.null(act.start) ) {act <- cues} else {act <- act.start} if(action ) plot(rep(1,n),act,xlim=c(0,t),ylim=c(minact,maxact),xlab="time",ylab="action", main="Actions over time") else plot(rep(1,n),ten,xlim=c(0,t),ylim=c(minten,maxten),xlab="time",ylab="action tendency",main="Action Tendencies over time") for (i in 1:t) { ten <- cues %*% excite + ten - act %*% excite %*% consume act <- ten %*% excite + act - act %*% excite %*% inhibit act[act<0] <- 0 if(!(i %% fast) ) {if( action) points(rep(i,n),act,col=colours,cex=.2) else points(rep(i,n),ten,col=colours,cex=.2) }} action <- TRUE} } } } # end of if FALSE acts <- acts/counts tends <- tends/counts cta.df <- data.frame(cues=cues,time=round(counts/t,2),frequency = rowSums(frequency),tendencies = round(t(tendencies/t)),actions = round(acts)) results <- list(cues=cues,cta=cta.df,inihibition=inhibit,time = counts/t,frequency=frequency,tendencies = tendencies/t,actions = actions/t) class(results) <- c("psych","cta") return(results) } "print_psych.cta" <- function(x,digits=2,all=NULL) { if(all) {unclass(x) print(x)} else { cat("\n Cues Tendency Action model\n") cat("Cue strength = ",x$cues) cat("\nTime spent = ",round(x$time,digits)) cat("\nAverage Tendency = ",round(x$tendencies)) cat("\nAverage Action strength = ",round(x$actions)) }} psych/R/direct.sl.R0000644000176200001440000000375213571764641013640 0ustar liggesusers#Added June 20, 2018 to try to do Neils Waller's direct Schmid Leiman Procrustes <-function(L, Target=NULL){#Adapted from Niels Waller (2017) if(is.null(Target)) Target <- factor2cluster(L) tM1M2 <- t(Target) %*% L svdtM1M2 <- svd(tM1M2) P <- svdtM1M2$u Q <- svdtM1M2$v T <- Q%*%t(P) ## Orthogonally rotate L to Target return(list(loadings = L %*%T,rotation = T)) } #allowing to specify a number of rotations oblique.rotations <- function(rotate="oblimin",loadings,...){ if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...))) if(inherits(ob,as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } return(list(loadings=loadings,Phi=Phi)) } #direct Schmid Leiman adapted from Waller (2017) direct.sl <- function(x,nfactors=3,rotate="oblimin",cut=.3,plot=TRUE){ nvar <- ncol(x) f <- fa(x,nfactors=nfactors,rotate ='none') #unrotated solution #construct the target from the rotated solution f.obl <- oblique.rotations(rotate=rotate,loadings = f$loadings)$loadings targ <- factor2cluster(f.obl,cut=cut) #Waller adjustments to target and factor model targ <- cbind(g=rep(1,nvar),targ) f0 <- cbind(rep(0,nvar),f$loadings) direct <- Procrustes(f0,targ)$loadings #The Waller Procrustes solution colnames(direct) <- colnames(targ) #put some labels in fa.diagram(direct,g=TRUE,simple=FALSE,cut=cut) return(direct) } psych/R/error.bars.R0000644000176200001440000001703114055264322014010 0ustar liggesusers"error.bars" <- function (x,stats=NULL,data=NULL,group=NULL,ylab ="Dependent Variable",xlab="Independent Variable", main=NULL, eyes=TRUE, ylim=NULL, xlim=NULL, alpha=.05, sd=FALSE, labels=NULL, pos=NULL,arrow.len=.05,arrow.col="black",add=FALSE,bars=FALSE,within=FALSE,col=c("black","blue","red"),density=-10,...) # x data frame with { if(!missing(x) && (inherits(x, "formula"))) {#if(!is.null(data)) #if there is a grouping variable, call error.bars.by # cat("\nFormula input detected, calling error.bars.by") error.bars.by(x,data=data,x.cat=TRUE,ylab =ylab,xlab=xlab,main=main,ylim= ylim, eyes=eyes,alpha=alpha,sd=sd,labels=labels, v.labels=NULL, pos=pos, arrow.len=arrow.len,add=add,bars=bars,within=within,colors=col, legend=0,density=density,stats=stats,...) } else { if(!missing(group)) { error.bars.by(x,group=group,x.cat=TRUE,ylab =ylab,xlab=xlab,main=main,ylim= ylim, eyes=eyes,alpha=.05,sd=sd,labels=labels, v.labels=NULL, pos=pos, arrow.len=arrow.len, add=add,bars=bars, within=within, colors=col, legend=0,density=density,stats=stats,...)} else { #no grouping variable, this is the normal case SCALE=.5 #scale the width of the cats eyes if(is.null(stats)) { x.stats <- describe(x) if (within) { x.smc <- smc(x,covar=TRUE) x.stats$se <- sqrt((x.stats$sd^2 - x.smc)/x.stats$n) } if(is.null(dim(x))) { z <- 1} else {z <- dim(x)[2]} #if we just have one variable names <- colnames(x) } else { x.stats <- stats z <- dim(x.stats)[1] names <- rownames(stats) if(is.null(x.stats$se) ) x.stats$se <- x.stats$sd/sqrt(x.stats$n-1) } min.x <- min(x.stats$mean,na.rm=TRUE) max.x <- max(x.stats$mean,na.rm=TRUE) max.se <- max(x.stats$se,na.rm=TRUE) {if(!sd) { if(is.null(stats)) {ci <- qt(1-alpha/2,x.stats$n-1) } else {ci <- rep(1,z) }} else {ci <- sqrt(x.stats$n) max.se <- max(ci * x.stats$se,na.rm=TRUE)} } if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } if(is.null(ylim)) {if(is.na(max.x) | is.na(max.se) | is.na(min.x) | is.infinite(max.x)| is.infinite(min.x) | is.infinite(max.se)) { ylim=c(0,1)} else { if(bars) { ylim=c(min(0,min.x- 3*max.se),max.x+3*max.se) } else { ylim=c(min.x - 3*max.se,max.x+3*max.se) }} } if(bars) {mp =barplot(x.stats$mean,ylim=ylim,xlab=xlab,ylab=ylab,main=main,col=col,...) axis(1,mp[1:z],names) axis(2) box() } else { if(!add){ if(missing(xlim)) {if (is.null(x.stats$values)) {xlim<- c(.5,z+.5) } else {xlim <- c(min(x.stats$values)-.5,max(x.stats$values)+.5)}} if(is.null(x.stats$values)) { plot(x.stats$mean,ylim=ylim,xlab=xlab,ylab=ylab,xlim=xlim,axes=FALSE,main=main,...) axis(1,1:z,names,...) axis(2) box()} else { plot(x.stats$values,x.stats$mean,ylim=ylim,xlim=xlim,xlab=xlab,ylab=ylab,main=main,...) } } else {points(x.stats$mean,...) } } #end of if(bars) if(!is.null(labels)) {lab <- labels} else {lab <- paste("V",1:z,sep="")} if (length(pos)==0) {locate <- rep(1,z)} else {locate <- pos} if (length(labels)==0) lab <- rep("",z) else lab <-labels for(s in 1:z) { #this allows us to address each one separately --probably slower but avoids complaints of 0 length if(bars) {arrows(mp[s],x.stats$mean[s]-ci[s]* x.stats$se[s],mp[s],x.stats$mean[s]+ci[s]* x.stats$se[s],length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)} else { if( (!is.na(x.stats$se[s])) && (x.stats$se[s] > 0)) { if(is.null(x.stats$values)) { arrows(s,x.stats$mean[s]-ci[s]* x.stats$se[s],s,x.stats$mean[s]+ci[s]* x.stats$se[s],length=arrow.len, angle = 90, code=3,col=arrow.col) } else { arrows(x.stats$values,x.stats$mean[s]-ci[s]* x.stats$se[s],x.stats$values,x.stats$mean[s]+ci[s]* x.stats$se[s],length=arrow.len, angle = 90, code=3, col=arrow.col)} } } } if(eyes) { if(length(col) == 1) col <- rep(col,z) ln <- seq(-3,3,.1) rev <- (length(ln):1) for (s in 1:z){ if(!is.null(x.stats$values[s] )) { catseyes(x=x.stats$values[s],y=x.stats$mean[s],se=x.stats$se[s],n=x.stats$n[s],alpha=alpha,density=density,col=col[s])} else { catseyes(x=s,y=x.stats$mean[s],se=x.stats$se[s],n=x.stats$n[s],alpha=alpha,density=density,col=col[s])}} } } } } #corrected July 25, 2009 to fix bug reported by Junqian Gordon Xu and then modified to be cleaner code #modified Sept 5, 2009 to handle data with all missing values (why we would want to that is a mystery, but was requested by Junqian Gordon Xu.) #April 5, 2010: the within parameter was added to allow for error bars in repeated measure designs #modified June, 2010 to allow for easier use of stats input #modified June 15, 2010 to allow color bars to match color of lines and to have standard deviations as an option #modified Sept 11, 2013 to pass n -1 to the qt function (reported by Trevor Dodds) #modified March 10, 2014 add the capability to draw "cats eyes" #corrected April 22, 2016 to correctly handle the "stats" option with cats eyes #added the se>0 test to not choke on drawing empty cats eyes "catseyes" <- function(x,y,se,n,alpha=alpha,density=density,col=col) { SCALE=.7 if(!is.na(se) &&(!is.null(se))&& ( se >0)) { #skip everything if no se #don't choke with no standard errors, just don't draw anything ln <- seq(-3,3,.1) rev <- (length(ln):1) if(is.null(n) || is.na(n)) {norm <- dnorm(ln) qt <- qnorm(alpha/2) clim <- qnorm(alpha/2)} else {if(n >1) { norm <- dt(ln,n-1) clim <- qt(alpha/2,n-1)}} norm <- c(norm,-norm[rev]) ln <- seq(-3,3,.1) cln <- seq(clim,-clim,.01) cnorm <- dnorm(cln) cnorm <- c(0,cnorm,0,-cnorm,0) #this closes the probability interval polygon(norm*SCALE+x,c(ln,ln[rev])*se+y) polygon(cnorm*SCALE+x,c(clim,cln,-clim,-cln,clim)*se+y,density=density,col=col)} } #added May 30, 2016 error.bars.tab <- function(t,way="columns",raw=FALSE,col=c('blue','red'),...) { rnames <- rownames(t) cnames <- colnames(t) t <- as.matrix(t) switch(way, columns = {p <- t(t(t)/colSums(t)) #convert to probabilities if(!raw) {standard.error <- t(sqrt(t(p * (1-p))/colSums(t)))} else {standard.error <- t(sqrt(t(p * (1-p))*colSums(t)))}}, rows = { t <- as.matrix(t) p <- t/rowSums(t) #convert to probabilities if(!raw) {standard.error <-sqrt(p * (1-p)/rowSums(t)) } else {standard.error <-sqrt(p * (1-p)* rowSums(t))}}, both = {p <- t(t(t)/sum(t)) #convert to probabilities if(!raw) {standard.error <- t(sqrt(t(p * (1-p))/sum(t)))} else{standard.error <- t(sqrt(t(p * (1-p))*sum(t)))} }) colnames(p) <-colnames(t) rownames(p) <- rownames(t) nv <- ncol(t) ng <- nrow(t) if(raw) {p <- t} stats <- data.frame(mean=as.vector(p), se=as.vector(standard.error)) rownames(stats) <- paste(rnames,rep(cnames,each=ng)) space <- rep(.1,nv*ng) for(i in 1:(nv-1)) {space[ng*i + 1] <- 1} error.bars(stats=stats,bars=TRUE,space=space, density=c(20,-10,20,-10),col=col,...) invisible(list(p=p,stats=stats)) }psych/R/makerepeated.r0000644000176200001440000000437713227171417014433 0ustar liggesusers makerepeated <- function(f=NULL, key="STATE",filter=NULL) { #go get the files we want if(is.null(f)) { fn <- file.choose() } else { fn <- f} #find a file in the directory you want dir <- dirname(fn) #the directory where the file was found files.list <- list.files(dir) select <- grep(key,files.list,ignore.case=TRUE) #these are the ones that match key selected <- files.list[select] if(!is.null(filter)) {selected <- files.list[select] select <- grep(filter,selected,ignore.case=TRUE,invert=TRUE) selected <- selected[select]} #first figure out the n.var for this set of files for (i in 1:length(selected)) { file <- selected[i] temp <- strsplit(file,'[- ]')[[1]] #strip out the experiment names from the other text study <- temp[1] #the study name time <- grep('[1234567890]',temp) #now, it is possible that the key name is embedded in the time #time <- gsub(key,"",temp[time],ignore.case=TRUE) #time <- gsub('[.]',"",temp[time],ignore.case=TRUE) if(length(time)>0 ){ time <- temp[time] } else {time <- 1} #we use scan rather than read.fwf because we have different widths for each file and we don't the format path <- file.path(dir,file) #this functionally reads fwf and converts to numeric cat("\nFile read=",file) temp <- scan(path,what="raw") periods <- grep("[.]",temp) if(length(periods) > 0) {temp <- strsplit(temp,"[.]") #we need to strip out periods temp <- unlist(temp)} #how many records per subject (some files have id field followed by a code field, followed by the data if(nchar(temp[2]) < nchar(temp[3])) {nlines <- 3} else {nlines <-2} n.obs <- length(temp)/nlines n.var <- nchar(temp[nlines]) new.data <- matrix(NA,nrow=n.obs,ncol=n.var +1) for( j in 1:n.obs) { #now, form a matrix of characters with the data new.data[j,1] <- temp[1+(j-1)*nlines] temp.split <- strsplit(temp[j*nlines],"") new.data[j,2:(n.var+1)] <- temp.split[[1]] #this gets the id field } new.data <- matrix(as.numeric(new.data),ncol=n.var + 1) colnames(new.data) <- c("id",paste0("V",1:n.var) ) new.data.df <- data.frame(study=study,time=time,new.data) if(i ==1) {big.data <- new.data.df} else { if(NCOL(new.data.df) != NCOL(big.data)) {cat("\nOoops file sizes don't match", file, "\n")} else { big.data <- rbind(big.data,new.data.df)} } } #end of for i loop return(big.data) } psych/R/mat.sort.R0000644000176200001440000000247113377010254013501 0ustar liggesusers"mat.sort" <- "matSort" <- function(m,f=NULL) { if (is.null(f) ) {f <- fa(m) } if(is.list(f) && (!is.null(loadings(f)))) {load <- loadings(f)} else {load <- f} load <- as.matrix(load) nitems <- NROW(load) nfactors <-NCOL(load) loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems),unclass(load)) #first sort them into clusters #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(load),1,which.max) ord <- sort(loads$cluster,index.return=TRUE) loads[1:nitems,] <- loads[ord$ix,] rownames(loads)[1:nitems] <- rownames(loads)[ord$ix] #now sort column wise #now sort the loadings that have their highest loading on each cluster items <- table(loads$cluster) #how many items are in each cluster? first <- 1 item <- loads$item for (i in 1:length(items)) { if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(loads[first:last,i+2]),decreasing=TRUE,index.return=TRUE) loads[first:last,3:(nfactors+2)] <- load[item[ord$ix+first-1],] loads[first:last,1] <- item[ord$ix+first-1] rownames(loads)[first:last] <- rownames(loads)[ord$ix+first-1] first <- first + items[i] } } item.order <- loads[,1] m <- m[item.order,item.order] return(m) } psych/R/SD.R0000644000176200001440000000046311645327150012241 0ustar liggesusers"SD" <- function (x, na.rm = TRUE) { if (is.matrix(x)) apply(x, 2, SD, na.rm = na.rm) else if (is.vector(x)) sqrt(var(x, na.rm = na.rm,use="pair")) else if (is.data.frame(x)) apply(x,2, SD, na.rm = na.rm) else sqrt(var(as.vector(x), na.rm = na.rm,use="pair")) } psych/R/bassAckward.R0000644000176200001440000001224114444460423014156 0ustar liggesusers"bassAckward" <- function(r,nfactors=1,fm="minres",rotate="oblimin",scores="tenBerge",adjust=TRUE,plot=TRUE,cut=.3,use="pairwise", cor="cor",weight=NULL,correct=.5,...) { cl <- match.call() #find r if data matrix if (!isCorrelation(r)) { matrix.input <- FALSE #return the correlation matrix in this case n.obs <- dim(r)[1] cnames <- colnames(r) # if given a rectangular matrix, then find the correlation or covariance #multiple ways of find correlations or covariances #added the weights option to tet, poly, tetrachoric, and polychoric June 27, 2018 switch(cor, cor = {r <- cor(r,use=use)}, cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) colnames(r) <- rownames(r) <- cnames } r.n <- list() fa <- list() Phi <- list() fa.Vaccounted <- list() num.fac <- length(nfactors) if (num.fac == 1L) { num.fac <- nfactors nfactors <- 1:num.fac} if(fm =="pca") { #do the conventional pca bass-akwards approach with components pc <- pca(r,nfactors[1],rotate=rotate) #pc <- pca(r) pc.weights <- pc$weights colnames(pc.weights) <- paste0("C",1:nfactors[1]) for(nf in 1:num.fac) { fn <- pca(r,nfactors[nf],rotate=rotate) colnames(fn$loadings) <- paste0("C",1:ncol(fn$loadings)) fa[[nf]] <- fn$loadings Phi[[nf]] <- fn$Phi fa.Vaccounted[[nf]] <- fn$Vaccounted[2,] pcn.weights <- fn$weights colnames(pcn.weights) <- paste0("C",1:ncol(pcn.weights)) colnames(pc$weights) <- paste0("C",1:ncol(pc$weights)) r.n[[nf]] <- t(pcn.weights) %*% r %*% pc$weights pc <- fn } } else { #factor analyze #two cases: #normal case is do a regular factor analysis #but, if rotate = "schmid" we do a schmid leiman rotation if(rotate != "schmid") { f <- fa(r,nfactors[1],fm=fm,rotate=rotate,scores=scores) for(nf in 1:num.fac) { fn <- fa(r,nfactors[nf],fm=fm,rotate=rotate,scores=scores) colnames(fn$loadings) <- paste0("F",1:ncol(fn$loadings)) fa[[nf]] <- fn$loadings Phi[[nf]] <- fn$Phi fn.weights <- fn$weights fa.Vaccounted[[nf]] <- fn$Vaccounted[2,] colnames(fn.weights) <- paste0("F",1:ncol(fn$weights)) colnames(f$weights) <- paste0("F",1:ncol(f$weights)) rf <- t(fn.weights) %*% r %*% f$weights #need to adjust by variances if not using tenBerge rs1 <- diag(t(f$weights) %*% r %*% f$weights ) rs2 <- diag(t(fn$weights) %*% r %*% fn$weights ) if(adjust) rf <- (diag(1/sqrt(rs2)) %*% rf %*% diag(1/sqrt(rs1))) colnames(rf) <- paste0("F",1:ncol(rf)) rownames(rf) <- paste0("F",1:nrow(rf)) r.n[[nf]] <- rf f <- fn} } else { #do schmid leiman extractions f <- schmid(r,nfactors[1],fm=fm) #the top level f$weights <- solve(r,f$sl[,1:(nfactors[1]+1)]) for(nf in 1:num.fac) { fn <- schmid(r,nfactors[nf],fm=fm) fn$loadings <- fn$sl[,1:(nfactors[nf] + 1)] colnames(fn$loadings) <- c("g", paste0("F*",1:(ncol(fn$loadings)-1))) fa[[nf]] <- fn$loadings fn$weights <- solve(r,fn$loadings) colnames(fn$weights) <- c("g",paste0("F*",1:(ncol(fn$weights)-1))) colnames(f$weights) <- c("g", paste0("F*",1:(ncol(f$weights)-1))) Phi <- NULL rf <- t(fn$weights) %*% r %*% f$weights #need to adjust by variances if not using tenBerge rs1 <- diag(t(f$weights) %*% r %*% f$weights ) rs2 <- diag(t(fn$weights) %*% r %*% fn$weights ) if(adjust) rf <- (diag(1/sqrt(rs2)) %*% rf %*% diag(1/sqrt(rs1))) colnames(rf) <- c("g",paste0("F*",1:(ncol(rf)-1))) rownames(rf) <- c("g", paste0("F*",1:(nrow(rf)-1))) r.n[[nf]] <- rf f <- fn } } } #Now summarize the results sumlist <- sumnames <- labels <- list() fa.loading.phi <-list() for(f in 1:nf) { sumlist[[f]] <- apply(r.n[[f]],2,function(x) {which(max(abs(x))==abs(x))}) sumnames[[f]] <- rownames(r.n[[f]])[sumlist[[f]]] labels[[f]] <- rownames(r.n[[f]]) if(length(Phi)>0) { #added this check March 3, 2020 fa.loading.phi [[f]] <-list(loadings = fa[[f]],Phi=Phi[[f]])} else { fa.loading.phi [[f]] <-list(loadings = fa[[f]],Phi=NA) } class(fa.loading.phi[[f]]) <- cs(psych,fa) } labels[[nf+1]] <- rownames(fn$loadings) r.n[[nf+1]] <- fn$loadings result <- list(Call=cl,fm=fm,bass.ack= r.n,Phi=Phi,r = r,summary=sumlist,sumnames=sumnames,labels =labels,fa=fa.loading.phi, fa.vac=fa.Vaccounted) class(result) <- c("psych","bassAck") if(plot) bassAckward.diagram(result,cut=cut,...) return(result) } print_psych.back<- function(x,digits=2 ,short=TRUE) { cat("\nCall: ") print(x$Call) nf <- length(x$bass.ack)-1 for (f in 1:nf) { cat("\n",f, x$sumnames[[f]])} if(!short) { for (f in 1:nf) { cat("\nFactor correlations\n ") print(round(x$bass.ack[[f]],digits=digits))} } } summary_psych.back <- function(x,digits=2) { cat("\nCall: ") print(x$Call) nf <- length(x$bass.ack)-1 for (f in 1:nf) { cat("\nFactor correlations\n ") print(round(x$bass.ack[[f]],digits=digits)) } } psych/R/Pinv.R0000644000176200001440000000043113452420457012644 0ustar liggesusers"Pinv" <- function(X,tol = sqrt(.Machine$double.eps)) { svdX <- svd(X) p <- svdX$d > max(tol * svdX$d[1],0 ) if(all(p)){ Pinv <- svdX$v %*% (1/svdX$d * t(svdX$u)) } else { Pinv <- svdX$v[,p,drop=FALSE] %*% (1/svdX$d[p] * t(svdX$u[,p,drop=FALSE])) } return(Pinv) }psych/R/parallel.irt.r0000644000176200001440000004230013574307730014365 0ustar liggesusers#revised August 31, 2012 to allow for estimation of all 0s or all 1s #modified March 10, 2016 to allow for quasi Bayesian estimates using normal theory #which doesn't seem to do what I want to do, so we are not doing it #June 30-July 7 , 2016 Trying to make it faster by parallelizing the code and #reducing the number of items to score when using keys. #uses local minima -- problematic for small number of items #corrected various problems with finding total (sum) scores 7/4/16 #starting to make parallel for speed #seems to result in at least a factor of 2 improvement #the function to do 2 parameter dichotomous IRT "score.irt.2" <- function(stats,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { #find the person parameters in a 2 parameter model we use deltas and betas from irt.discrim and irt.person.rasch #find the person parameter #This does the normal fit #has several internal functions irt.2par.norm <- function(x,delta,beta,scores) { fit <- -1*(log(scores*(1-pnorm(beta*(delta-x))) + (1-scores)*(1-pnorm(beta*(x-delta))))) mean(fit,na.rm=TRUE) } #This does the logistic fit irt.2par <- function(x,delta,beta,scores) { fit <- -1*(log(scores/(1+exp(beta*(delta-x))) + (1-scores)/(1+exp(beta*(x-delta))))) mean(fit,na.rm=TRUE) } ### #this is the the one to use when parallelized bigFunction <- function(f,n.obs,stats,items,keys=NULL,cut=.3,bounds=c(-5,5),mod="logistic") { nf <- length(stats$difficulty) diff <- stats$difficulty[[f]] cat <- dim(diff)[2] if(nf < 2) {#discrim <- drop(stats$discrimination) discrim <- stats$discrimination # although I need to check this with keys if(!is.null(keys)) {discrim <- discrim * abs(keys)} } else {discrim <- stats$discrimination[,f] if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) }} ### fit <- rep(NA,n.obs) theta <- rep(NA,n.obs) if(is.null(keys)) {#drop the items with discrim < cut items.f <- items[,(abs(discrim[,f]) > cut) ,drop=FALSE] #get rid of the those items that are not being processed for this factor diffi.f <- diff[(abs(discrim[,f]) > 0)] #and the redundant diffi discrim.f <- discrim[(abs(discrim[,f]) > cut),drop=FALSE ] #and get rid of the unnecessary discrim values } else { #the case of scoring with a keys vector items.f <- items[,(abs(keys[,f]) > 0) ,drop=FALSE] #get rid of the those items that are not being processed for this factor discrim.f <- discrim[(abs(keys[,f]) > 0),drop=FALSE ] #and get rid of the unnecessary discrim values diffi.f <- diff[(abs(keys[,f]) > 0)] #and the redundant diffi } diffi.vect <- as.vector(t(diffi.f)) #discrim.F.vect <- rep(discrim.f,each=cat) #discrim.f <- discrim[(abs(discrim > cut)),drop=FALSE] discrim.F.vect <- as.vector(t(discrim.f)) if(is.matrix(discrim)) discrim.F.vect <- drop(discrim.F.vect) total <- rowMeans(t(t(items.f)*sign(discrim.F.vect)),na.rm=TRUE) count <- rowSums(!is.na(items.f)) #We can speed this up somewhat if we don't try to fit items with 0 discrim (i.e., items that do not load on the factor or are not keyed) #do it for all subject for this factor #now, lets parallelize this one as well for (i in 1:n.obs) { #First we consider the case of all right or all wrong if (count[i] > 0) {if((sum(items.f[i,],na.rm=TRUE) ==0 ) | (prod(items.f[i,],na.rm=TRUE) == 1 )) { if(sum(items.f[i,],na.rm=TRUE) ==0 ) { if(mod=="logistic") {p <- log(1-(1-items.f[i,])/(1+exp(discrim.f*(diffi.f))) )} else { #logistic p <- log(1-(pnorm(items.f[i,]*discrim.f*diffi.f))) } #normals pall <- exp(sum(p,na.rm=TRUE)) # theta[i] <- qnorm(pnorm(qnorm(pall))/2) #the z value of 1/2 the quantile value of pall theta[i] <- qnorm(pnorm(qnorm(pall))) #the z value of the quantile value of pall fit[i] <- 0 # cat ("\nThe case of all wrong",i,theta[i]) } else { #the case of all right if(mod == "logistic") { p <- log((items.f[i,])/(1+exp(discrim.f*(diffi.f))) )} else { p <- log((items.f[i,])*(1 - pnorm(1- discrim.f*(diffi.f)) )) } pall <- exp(sum(p,na.rm=TRUE)) # theta[i] <- qnorm(1-pnorm(qnorm(pall))/2) #the z value of 1/2 the quantile value of pall theta[i] <- qnorm(1-pnorm(qnorm(pall))) #or, perhaps just hte z value of the quantile value of pall fit[i] <- 0 } } else { #cat("the normal case",i ) if(mod=="logistic") { myfit <- optimize(irt.2par,bounds,beta=discrim.f,delta=diffi.f,scores=items.f[i,]) #how to do an apply? } else {myfit <- optimize(irt.2par.norm,bounds,beta=discrim.f,delta=diffi.f,scores=items.f[i,])} #do a normal fit function theta[i] <- myfit$minimum fit[i] <- myfit$objective #fit of optimizing program }} else {#cat("\nno items",i) theta[i] <- NA fit[i] <- NA } #end if count ... else } #end subject loop theta [theta < bounds[1]] <- bounds[1] theta[theta > bounds[2]] <- bounds[2] # if((!is.null(keys)) & (all(keys[,f] == -1) || (sign(cor(discrim,keys[,f],use="pairwise")) < 0) )) {theta <- -theta #if((!is.null(keys)) & (all(keys[,f] == -1) )) {theta <- -theta # total <- -total} nf <- length(stats$difficulty) n.obs <- dim(items)[1] nvar <- dim(items)[2] # scores <- matrix(NaN,nrow=n.obs,ncol=nf*3) scores <- list(nf*3) scores <- list(theta,total,fit) return(scores) } # end of bigFunction #we now start score.irt.2 proper #this finds scores using multiple cores if they are available nf <- length(stats$difficulty) n.obs <- dim(items)[1] #nvar <- dim(items)[2] # if(nf < 2) {#discrim <- drop(stats$discrimination) # discrim <- stats$discrimination # if(!is.null(keys)) {discrim <- discrim * abs(keys) } # } else {discrim <- stats$discrimination[,f] # if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) # }} #{for (f in 1:nf) { #note that only items that load on this factor need to be considered #discrim <- stats$discrimination[,f] # if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) # } #use mapply for debugging, mcmapply for parallel processing scores <- mapply(bigFunction,c(1:nf),MoreArgs=list(n.obs=n.obs,items=items,stats=stats,keys=keys, cut=cut, bounds=bounds, mod=mod)) nf <- length(stats$difficulty) scores <- matrix(unlist(scores),ncol=nf*3) scores <- scores[,c(seq(1,nf*3,3),seq(2,nf*3+1,3),seq(3,nf*3 +2,3))] colnames(scores) <- paste(rep(c("theta","total","fit"),each=nf),1:nf,sep="") return(scores) }#end of score.irt.2 ############# "score.irt.poly" <- function(stats,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { #find the person parameters in a 2 parameter model we use deltas and betas from irt.discrim and irt.person.rasch #find the person parameter #created July 4, 2011 irt.2par.poly <- function(x,delta,beta,scores) { fit <- -1*(log(scores/(1+exp(beta*(delta-x))) + (1-scores)/(1+exp(beta*(x-delta))))) mean(fit,na.rm=TRUE) } ####The function that is parallelized big.poly <- function(f,n.obs,stats,items,keys=NULL,cut=.3,bounds=c(-5,5),mod="logistic") { nf <- ncol(stats$discrimination) #for (f in 1:nf) { #do it for every factor/scale diff <- stats$difficulty[[f]] if(nf < 2) {discrim <- stats$discrimination if(!is.null(keys)) {discrim <- discrim * abs(keys) } } else {discrim <- stats$discrimination[,f] if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) } } cat <- dim(diff)[2] fit <- rep(NA,n.obs) theta <- rep(NA,n.obs) item.f <- t(items) item.f[abs(discrim) < cut] <- NA #this does not change the item, just the temp version of the item item.f <- t(item.f) ### item.f <- item.f[,(abs(keys[,f])>0) ,drop=FALSE] #get rid of the those items that are not being processed for this factor discrim.f <- discrim[(abs(keys[,f]) >0),drop=FALSE ] #and get rid of the unnecessary discrim values #diffi.f <- diff[(abs(keys[,f]) > 0),drop=FALSE] #and the redundant diffi diffi.f <- diff[(abs(keys[,f])>0),byrows=TRUE] #diffi.vect <- as.vector(t(diffi.f)) #diffi.vect <- as.vector(diffi.f) diffi.vect <- as.vector(t(diff[(abs(keys[,f])>0),byrows=TRUE])) discrim.F.vect <- rep(discrim.f,each=cat) ## notice that this is vectorized and does it all subjects #seem to have solved the problem of missing items which are reversed. total <- rowMeans(t(t(item.f )* as.vector(sign(discrim.f))),na.rm=TRUE) #fixed 11/11/11 to be as.vector # total.positive <- rowMeans(t(t(item.f)* as.vector(sign(discrim.f) > 0)),na.rm=TRUE) # total.negative <- rowMeans(t(t(item.f)* as.vector(sign(discrim.f) < 0)),na.rm=TRUE) ## num.keyed <- rowSums(!is.na(item.f)) num.reversed <- rowSums(!is.na(item.f[,discrim.f <0,drop=FALSE])) total <- total + num.reversed * (max.item- min.item+1)/num.keyed + min.item count <- rowSums(!is.na(item.f)) #but now, we need to the next step one at a time (I think) for (subj in 1:n.obs) { if (count[subj]>0) { newscore <- NULL score <- item.f[subj,] #just the items to be scored for (i in 1:ncol(item.f)) { #Treat the items as a series of 1 or 0 responses if(is.na(score[i])) {newscore <- c(newscore,rep(NA,cat)) } else { if(score[i] == cat) {newscore <- c(newscore,rep(1,score[i])) } else { newscore <- c(newscore,rep(1,score[i]),rep(0,cat-score[i])) }} } #check for all lowest responses or all highest responses if((sum(newscore,na.rm=TRUE) ==0 ) | (prod(newscore,na.rm=TRUE) == 1) | (total[subj]==min.item) | (total [subj]== (max.item+min.item)) ) { if((sum(newscore,na.rm=TRUE) ==0) | (total[subj]== min.item)) { if(mod=="logistic") {p <- log(1-(1-newscore)/(1+exp(abs(discrim.f)*(diffi.f))) )} else { #logistic p <- log(1-(pnorm(newscore*abs(discrim.f)*diffi.f))) } #normals pall <- exp(sum(p,na.rm=TRUE)) # theta[i] <- qnorm(pnorm(qnorm(pall))/2) #the z value of 1/2 the quantile value of pall theta[i] <- qnorm(pnorm(qnorm(pall))) #just the z value of the quantile of pall fit[i] <- 0 # cat ("\nThe case of all wrong",i,theta[i]) } else { #the case of all right if(mod == "logistic") { p <- log(1/(1+exp(abs(discrim.f)*(diffi.f))) )} else { p <- log((1)*(1 - pnorm(1- abs(discrim.f)*(diffi.f)) )) } pall <- exp(sum(p,na.rm=TRUE)) theta[subj] <- qnorm(1-pnorm(qnorm(pall))) #the z value of the quantile value of pall fit[subj] <- 0 } } else { myfit <- optimize(irt.2par.poly,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore) theta[subj] <- myfit$minimum fit[subj] <- myfit$objective #fit of optimizing program } } else { fit[subj] <- NA theta[subj] <- NA } #end if else } if((!is.null(keys)) & (all(keys[,f] == -1) )) {theta <- -theta total <- -total} theta[theta bounds[2]] <- bounds[2] scores <- list(theta,total, fit) return(scores) } #end of big function ##the start of the irt.poly.function after setting up the various subfunctions min.item <- min(items,na.rm=TRUE) #uses local minima --probably problematic for small number of items items <- items - min.item #this converts scores to positive values from 0 up max.item <- max(items,na.rm=TRUE) #we use this when reverse score -- but note that this is not the original max value. We will adjust this in total nf <- length(stats$difficulty) n.obs <- dim(items)[1] nvar <- dim(items)[2] #mcmapply for parallel, mappy for debugging scores <- mcmapply(big.poly,1:nf,MoreArgs=list(n.obs=n.obs,stats=stats,items=items,keys=keys,cut=.3,bounds=bounds,mod=mod)) scores <- matrix(unlist(scores),ncol=nf*3) scores <- scores[,c(seq(1,nf*3,3),seq(2,nf*3+1,3),seq(3,nf*3 +2,3))] colnames(scores) <- paste(rep(c("theta","total","fit"),each=nf),1:nf,sep="") return(scores) } #end of score.irt.poly #added the tau option in switch in case we have already done irt.tau 6/29/16 "score.irt" <- function(stats=NULL,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { if(!is.null(keys) && (is.vector(keys))) keys <- matrix(keys) if (length(class(stats)) > 1) { if(!is.null(keys) && is.vector(keys)) keys <- as.matrix(keys) obnames <- cs(irt.poly, irt.fa, fa,tau) value <- inherits(stats, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} switch(value, irt.poly = {scores <- score.irt.poly(stats$irt,items,keys,cut,bounds=bounds,mod=mod) }, irt.fa = {scores <- score.irt.2(stats$irt,items,keys,cut,bounds=bounds,mod=mod)}, fa = {tau <- irt.tau(items) #this is the case of a factor analysis to be applied to irt nf <- dim(stats$loadings)[2] diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau/sqrt(1-stats$loadings[,i]^2) } discrim <- stats$loadings/sqrt(1-stats$loadings^2) class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) scores <- score.irt.poly(new.stats,items,keys,cut,bounds=bounds)}, tau = {tau <- stats #get the tau stats from a prior run if(is.matrix(keys)) {nf <- dim(keys)[2]} else {nf <-1} diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau } discrim <- keys class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) if(dim(tau)[2] ==1) {scores <- score.irt.2(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} else { scores <- score.irt.poly(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} } ) #we should have a null case } else {#input is a keys matrix tau <- irt.tau(items) #this is the case of a using a scoring matrix to be applied to irt if(is.matrix(keys)) {nf <- dim(keys)[2]} else {nf <-1} diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau } if(!is.null(keys)) {discrim <- keys} else {stop("I am sorry, you specified tau but not keys.")} class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) if(dim(tau)[2] ==1) {scores <- score.irt.2(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} else { scores <- score.irt.poly(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} } scores <- data.frame(scores) return(scores) } #find tau from dichotomous or polytomous data without bothering to find the correlations #useful for score.irt "irt.tau" <- function(x) { x <-as.matrix(x) nvar <- dim(x)[2] xt <- table(x) nvalues <- length(xt) #find the number of response alternatives if(nvalues ==2) {tau <- -qnorm(colMeans(x,na.rm=TRUE)) tau <- as.matrix(tau) rownames(tau) <- colnames(x)} else { if(nvalues > 10) stop("You have more than 10 categories for your items, polychoric is probably not needed") xmin <- min(x,na.rm=TRUE) xfreq <- apply(x- xmin+ 1,2,tabulate,nbins=nvalues) n.obs <- colSums(xfreq) xfreq <- t(t(xfreq)/n.obs) tau <- qnorm(apply(xfreq,2,cumsum))[1:(nvalues-1),] #these are the normal values of the cuts if(!is.matrix(tau)) tau <- matrix(tau,ncol=nvar) rownames(tau) <- names(xt)[1:(nvalues-1)] colnames(tau) <- colnames(x) if(dim(tau)[1] < dim(tau)[2]) tau <- t(tau) #rows are variables, columns are subjects } class(tau) <- c("psych","tau") #added the tau class so score.irt can use the tau values return(tau) } #added August 6, 2012 "irt.responses" <- function(theta,items, breaks = 11,show.missing=FALSE,show.legend=TRUE,legend.location="topleft",colors=NULL,...) { #if(is.null(colors)) colors =c("gray0", "blue3", "red3", "darkgreen", "gold2", "gray50", "cornflowerblue", "mediumorchid2") if(is.null(colors)) colors =c("black", "blue", "red", "darkgreen", "gold2", "gray50", "cornflowerblue", "mediumorchid2") #legend.location <- c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center","none") #uniqueitems <- unique(as.vector(unlist(items))) item.counts <- names(table(as.vector(unlist(items)))) uniqueitems <- as.numeric(item.counts) nalt <- length(uniqueitems) + 1 #include the missing value from response.frequencies nvar <- ncol(items) theta.min <- min(theta,na.rm=TRUE) theta.max <- max(theta,na.rm=TRUE) binrange <- cut(theta, breaks = breaks) binnums <- as.numeric(binrange) items <- as.matrix(items) stats <- by(items,binnums,function(x) response.frequencies(x,uniqueitems=uniqueitems)) stats.m <- unlist(stats) stats.m <- matrix(stats.m,ncol=nvar*nalt,byrow=TRUE) theta <- seq(theta.min,theta.max,length.out=breaks) for (i in 1:nvar) { plot(theta,stats.m[,i],ylim=c(0,1),typ="l",xlab="theta",ylab="P(response)",main=paste(colnames(items)[i]),col=colors[1],...) for(j in 1:(nalt-2+show.missing)) { points(theta,stats.m[,i+nvar*j],typ="l",lty=(j+1),col=colors[j+1 ],...) } if(show.legend) { legend(legend.location, paste(item.counts[1:(nalt-1 + show.missing)]), text.col = colors[1:(nalt-1+show.missing)], lty = 1:(nalt-1+show.missing), ncol=4,bty="n")} }} psych/R/circ.tests.R0000644000176200001440000000326613501462763014023 0ustar liggesusers"circ.tests" <- function(loads,loading=TRUE,sorting=TRUE) { cl <- match.call() circ.gap <- function(loads,loading=TRUE,sorting=TRUE) { if (loading) {l <- loads$loadings} else { l <- loads} l<- l[,1:2] commun=rowSums(l*l) theta=sign(l[,2])*acos(l[,1]/sqrt(commun)) #vector angle in radians if(sorting) {theta<- sort(theta)} gaps <- diff(theta) test <- var(gaps) return(test) } circ.fisher <- function(loads,loading=TRUE) { if (loading) {l <- loads$loadings} else { l <- loads} l<- l[,1:2] radius <- sqrt(rowSums(l^2)) #added sqrt 6/15/19 test <- sd(radius)/mean(radius) return (test) } circ.rt <- function(loads,loading=TRUE) { if (loading) {l <- loads$loadings} else { l <- loads} l<- l[,1:2] qmc <- rep(0,10) for (i in 0:9) {theta <- 5*i rl <- factor.rotate(l,theta,1,2) l2 <- rl*rl qmc[i] <- sum(apply(l2,1,var)) } test <- sd(qmc)/mean(qmc) } circ.v2 <- function(loads,loading=TRUE) { if (loading) {l <- loads$loadings} else { l <- loads} l<- l[,1:2] crit <- rep(0,10) for (i in 0:9) { theta <- 5*i rl <- factor.rotate(l,theta,1,2) l2 <- rl*rl suml2 <- sum(l2) crit[i] <- var(l2[,1]/suml2) } test <- sd(crit)/mean(crit) return (test) } gap.test <- circ.gap(loads,loading,sorting) fisher.test <- circ.fisher(loads,loading) rotation.test <- circ.rt(loads,loading) variance.test <- circ.v2(loads,loading) circ.tests <- list(gaps=gap.test,fisher=fisher.test,RT=rotation.test,VT=variance.test,Call=cl) class(circ.tests) <- c("psych","circ") return(circ.tests) }psych/R/densityBy.r0000644000176200001440000003236514477707436013772 0ustar liggesusers"violin" <- function(x,data=NULL,var=NULL,grp=NULL,grp.name=NULL, xlab=NULL, ylab=NULL,main="Density plot",vertical=TRUE, dots=FALSE, rain=FALSE,jitter=.05, alpha=1,errors=FALSE,eyes=TRUE,adjust=1, restrict=TRUE,xlim=NULL,add=FALSE,col=NULL,pch=20,scale=NULL, ...) {#calls violinBy which is now defaulg violinBy(x=x,data=data,var=var,grp=grp,grp.name=grp.name,xlab=xlab,ylab=ylab,main=main,vertical=vertical, dots=dots, rain=rain, jitter=jitter, alpha=alpha, errors=errors,eyes=eyes,adjust=adjust,restrict=restrict,xlim=xlim, add=add,col=col,pch=pch,scale=scale,...) } #switched from density = 50 to alpha =.5 to speed up the plotting "violinBy" <- function(x,var=NULL,grp=NULL,data=NULL,grp.name=NULL,xlab=NULL, ylab=NULL, main="Density plot",vertical=TRUE, dots=FALSE,rain=FALSE, jitter=.05,alpha= 1,errors=FALSE,eyes=TRUE,adjust=1, restrict=TRUE,xlim=NULL,add=FALSE,col=NULL,pch=20,scale=NULL, ...) { if(is.null(scale)) SCALE=.3 #how wide are the plots? count.valid <- function(x) {sum(!is.na(x)) } if(is.null(col)) {col <- c("blue","red","grey","purple","green","yellow")} #default colors formula <- FALSE if(inherits(x, "formula")) { ps <- fparse(x) formula <- TRUE if(is.null(data)) {data <- get(ps$y) var <- NULL #we specified the entire dataframe} else { #stop("You must specify the data if you are using formula input") x <- data} else {x <- data[,ps$y,drop=FALSE] } #added the drop = FALSE 11/28/20 grp <- data[ps$x] if(is.null(xlab)) xlab <- ps$x names <- ps$x grp.n <- ps$x if(is.null(ylab)) ylab <- ps$y } if(!formula) {ps <- list(x = grp, y = x) #convert from x, grp to y ~ x format if(is.null(data)) {data <- x if(!is.null(var)) {data <- data[,var, drop=FALSE]} var <- ps$y <- colnames(data) if(is.null(var)) var <- paste("V",1:NCOL(data)) # colnames(x) <- colnames(data) <- ps$y <- var} colnames(data) <- ps$y <- var} if(!is.null(grp)) {x <- cbind(data[,ps$y,drop=FALSE], x[,grp,drop=FALSE])} else { x <- data[,ps$y,drop=FALSE] } data <- x if(is.null(ylab)) ylab = "Observed " if(is.null(xlab)) xlab = "" } if(!is.null(grp)) { if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp,drop=FALSE]} if(!is.null(var)) {if(missing(ylab) & (length(var) ==1)) {ylab <- var} x <- x[,var , drop = FALSE]} nvar <- nvarg <- NCOL(x) x <- char2numeric(x) col <- adjustcolor(col,alpha.f =alpha) #if(!is.null(grp)) { # if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp] if(!is.null(grp)) {stats <- describeBy(data[,ps$y],group = data[,ps$x],quant=c(0,1,.5,.25,.75),mat=TRUE,skew=FALSE) } else { stats <- describe(data[,ps$y,drop=FALSE],quant=c(0,1,.5,.25,.75),skew=FALSE)} meanX <- stats[,"mean"] minx <- stats[,"min"] maxx <- stats[,"max"] medx <- stats[,"Q0.5"] Q25 <- stats[,"Q0.25"] Q75 <- stats[,"Q0.75"] nvarg <- NROW(stats) n.col <- length(col) nX <- stats[,"n"] col <- rep(col,nvarg) #too many, but this will do # Qnt <- apply(x,2,function(xx) by(xx,grp,quantile,prob=c(0,1,.5,.25,.75),na.rm=TRUE)) # meanX <- apply(x,2,function(xx) by(xx,grp,mean,na.rm=TRUE)) # nX <- apply(x,2,function(xx) by(xx,grp,count.valid)) # meanX <- matrix(unlist(meanX)) # Qnt <- matrix(unlist(Qnt),nrow=5) # ngrp <- ncol(Qnt)/nvar # nvarg <- ncol(Qnt) # rangex <- matrix(c(Qnt[1,],Qnt[2,]),nrow=2,byrow=TRUE) # } else {Qnt <- apply(x,2,quantile,prob=c(0,1,.5,.25,.75),na.rm=TRUE) # meanX <- apply(x,2,mean,na.rm=TRUE)} # minx <- Qnt[1,] # maxx <- Qnt[2,] # medx <- Qnt[3,] # Q25 <- Qnt[4,] # Q75 <- Qnt[5,] # #rangex <- apply(x,2,range,na.rm=TRUE) # rangex <- matrix(c(Qnt[1,],Qnt[2,]),nrow=2,byrow=TRUE) tot.n.obs <- nrow(x) names <- grp.name if(is.null(grp.name) & !is.null(grp)) { grp.name <- paste(ps$x[1],stats[,"group1"]) names <- grp.name } else {if(length(grp.name) != NROW(stats)) names <- rownames(stats)} # names <- rep(grp.name, length(ps$x)) # #if(length(ps$x) > 1) ) ) {names <- paste(rep(names,each=ngrp),grp.name[1:ngrp],sep=" ")} else {names <- grp.name} # if(length(ps$y) > 1) names <- rep(names,length(ps$x)) # col <- rep(col,nvar* ngrp) d <- list(nvar) if(is.null(xlim)) xlim <- c(.5,nvarg+.5) for (i in 1:nvar) { #if(!is.null(grp)) { if(restrict) {d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE,from=rangex[1,i],to=rangex[2,i]))} if(!is.null(grp)) { if(restrict) {d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE,adjust=adjust,from=min(xx,na.rm=TRUE),to=max(xx,na.rm=TRUE)))} else { d[[i]] <- by(x[,i], grp ,function(xx) density(xx,na.rm=TRUE)) }} else { if(restrict) {d[[i]] <- density(x[,i],na.rm=TRUE,adjust=adjust,from=minx[i],to=maxx[i])} else { d[[i]] <- density(x[,i],na.rm=TRUE)} } } if(!add) {if(vertical){plot(1:nvarg,meanX,ylim=c(min(minx),max(maxx)),xlim=xlim,axes=FALSE,xlab=xlab,ylab=ylab,main=main,pch=pch,...) axis(1,1:nvarg,names,...) axis(2,...)} else {plot(meanX,1:nvarg,xlim=c(min(minx),max(maxx)),ylim=c(.5,nvarg+.5),axes=FALSE,xlab=ylab,ylab=xlab,main=main,pch=pch,...) axis(1,...) axis(2, 1:nvarg,names,...)} box()} if(!is.null(grp)) d <- unlist(d,recursive=FALSE) rev <- (length(d[[1]]$y):1) #this prevents a line down the middle for(i in 1:nvarg) { if(!is.null(scale)) {width <- scale*sqrt(nX[[i]]/tot.n.obs)/max(d[[i]]$y)} else {width <- SCALE/max(d[[i]]$y)} #polygon(width*c(-d[[i]]$y,d[[i]]$y[rev])+i,c(d[[i]]$x,d[[i]]$x[rev]),density=density,col=col[i],...) #if(vertical) {polygon(width*c(-d[[i]]$y, d[[i]]$y[rev])+i, c(d[[i]]$x, d[[i]]$x[rev]),col=col[i ],...)} else { # polygon(c(d[[i]]$x, d[[i]]$x[rev]), width* c(-d[[i]]$y, d[[i]]$y[rev])+i,col=col[i],...)} zero <- rep(0,length(d[[i]]$x)) if(rain) {if(vertical) {polygon(width*c(d[[i]]$y, zero)+i,c( d[[i]]$x,d[[i]]$x[rev] ),col=col[i ],...)} else { polygon(c(d[[i]]$x, d[[i]]$x[rev]), width* c(d[[i]]$y, zero)+i,col=col[i],...)}} else { #not rain if(vertical) {polygon(width*c(-d[[i]]$y, d[[i]]$y[rev])+i, c(d[[i]]$x, d[[i]]$x[rev]),col=col[i ],...)} else { polygon(c(d[[i]]$x, d[[i]]$x[rev]), width* c(-d[[i]]$y, d[[i]]$y[rev])+i,col=col[i],...)}} # if(vertical) {polygon(width*c(-d[[i]]$y, d[[i]]$y[rev])+i, c(d[[i]]$x, d[[i]]$x[rev]+i),col=col[i ],...)} else { # polygon(c(d[[i]]$x, d[[i]]$x[rev]), width* c(-d[[i]]$y, d[[i]]$y[rev])+i,col=col[i],...)}} #draw the quartiles and medians dmd <- max(which(d[[i]]$x <= medx[i])) d25 <- max(which(d[[i]]$x <= Q25[i])) d75 <- max(which(d[[i]]$x <= Q75[i])) if(vertical) { segments(x0=width*d[[i]]$y[dmd] +i ,y0=d[[i]]$x[dmd],x1=-(1-rain)*width*d[[i]]$y[dmd]+i,y1=d[[i]]$x[dmd],lwd=2) segments(x0=width*d[[i]]$y[d25] +i ,y0=d[[i]]$x[d25],x1=-(1-rain)*width*d[[i]]$y[d25]+i,y1=d[[i]]$x[d25]) segments(x0=width*d[[i]]$y[d75] +i ,y0=d[[i]]$x[d75],x1=-(1-rain)*width*d[[i]]$y[d75]+i,y1=d[[i]]$x[d75]) } else { segments(y0=width*d[[i]]$y[dmd] +i ,x0=d[[i]]$x[dmd],y1=(rain-1)*width*d[[i]]$y[dmd]+i,x1=d[[i]]$x[dmd],lwd=2) segments(y0=width*d[[i]]$y[d25] +i ,x0=d[[i]]$x[d25],y1=(rain-1)*width*d[[i]]$y[d25]+i,x1=d[[i]]$x[d25]) segments(y0=width*d[[i]]$y[d75] +i ,x0=d[[i]]$x[d75],y1=(rain-1)*width*d[[i]]$y[d75]+i,x1=d[[i]]$x[d75]) } } if(errors) { #currently only works for one DV and one IV if(vertical) { error.bars.by(x[,1],group=grp[,1],add=TRUE,lines=FALSE,eyes=eyes,v.labels=names,...) } else { message("error bars are not implemented for horizontal plots")} } if(dots) { if(!is.null(grp)) { if(jitter >0) {if(length(ps$x) > 1 ) {stripchart(x[,ps$y]~ grp[,ps$x[1] ] +grp[,ps$x[2]] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...)} else { stripchart(x[,ps$y]~ grp[,ps$x[1] ] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...) }} else { if(length(ps$x) > 1 ) {stripchart(x[,ps$y]~ grp[,ps$x[1]]+ grp[,ps$x[,2]],vertical=vertical,add=TRUE,pch=pch,...)} else { {stripchart(x[,ps$y]~ grp[,ps$x[1]],vertical=vertical,add=TRUE,pch=pch,...)} }} } else { if(jitter >0) {stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...)} else { stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,...) }} } if(rain) { for(i in 1:nvar) {if(!is.null(grp)) { tempat <- 1:length(table(grp)) -.1 + (i-1)*nvar if(jitter >0) {if(length(ps$x) > 1 ) {stripchart(x[,ps$y[i]]~ grp[,ps$x[1] ] +grp[,ps$x[2]] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch, at=tempat,...)} else { stripchart(x[,ps$y[i]]~ grp[,ps$x[1] ] ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat, ...) }} else { if(length(ps$x) > 1 ) {stripchart(x[,ps$y[i]]~ grp[,ps$x[1]]+ grp[,ps$x[,2]],vertical=vertical,add=TRUE,pch=pch,...)} else { {stripchart(x[,ps$y[i]]~ grp[,ps$x[1]],vertical=vertical,add=TRUE,pch=pch,...)} }} } else { tempat <- 1:nvar -.1 + (i-1)*nvar if(jitter >0) {stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat,...)} else { stripchart(x ,vertical=vertical,method="jitter",jitter=jitter,add=TRUE,pch=pch,at=tempat,...) }} } } } #created March 10, 2014 following a discussion of the advantage of showing distributional values #modified March 22, 2014 to add the grouping variable #basically just a violin plot. #modified December, 2016 to allow for scaling of the widths of the plots by sample size. #modified December 2020 to provide stripcharts and errorbar options # # "histBy" <- function(x,grp=NULL,data=NULL,restrict=TRUE,xlim=NULL,ylab="Observed",xlab="",main="Density plot",density=20,scale=TRUE,col= c("blue","red"),...) { # count.valid <- function(x) {sum(!is.na(x)) } # formula <- FALSE # if(inherits(x, "formula")) { ps <- fparse(x) # formula <- TRUE # if(is.null(data)) stop("You must specify the data if you are using formula input") # x <- data[ps$y] # grp <- data[ps$x] # } # if( #(col)) {col <- c("blue","red")} # if(restrict) {minx <- min(x,na.rm=TRUE) # maxx <- max(x,na.rm=TRUE)} # x <- as.matrix(x,drop=FALSE) # meanX <- apply(x,2,mean,na.rm=TRUE) # nX <- apply(x,2,function(xx) by(xx,grp,count.valid)) # # if(!is.null(grp)) { if(restrict) {d <- by(x[,1], grp ,function(xx) density(xx,na.rm=TRUE,from=min(xx,na.rm=TRUE),to=max(xx,na.rm=TRUE)))} else { # d <- by(x[,1], grp ,function(xx) density(xx,na.rm=TRUE)) }} else { # if(restrict) {d <- density(x[,1],na.rm=TRUE,from=minx,to=maxx)} else { # d <- density(x[,1],na.rm=TRUE)} } # d <- unlist(d,recursive=FALSE) # maxy <- max(d[[1]]$y,d[[2]]$y,na.rm=TRUE) # plot(NA,ylim=c(0,maxy),xlim=xlim,axes=FALSE,xlab=xlab,ylab=ylab,main=main,pch=pch,...) # axis(1,1:nvarg,names,...) # axis(2,...) # box() # if(!is.null(scale)) {width <- scale*sqrt(nX[[i]]/tot.n.obs)/max(d[[i]]$y)} else {width <- SCALE/max(d[[i]]$y)} # polygon(width*c(-d[[i]]$x,d[[i]]$x[rev])+i,c(d[[i]]$y,d[[i]]$y[rev]),density=density,col=col[i],...) # } #11/28/17 #Improved 09/02/20 "densityBy" <- function(x,var=NULL,grp=NULL,data=NULL,freq=FALSE,col=c("blue","red","black"),alpha=.5,adjust=1,ylim=NULL,xlim=NULL,xlab="Variable", ylab="Density",main="Density Plot",legend=NULL) { formula <- FALSE if(inherits(x, "formula")) { ps <- fparse(x) formula <- TRUE if(is.null(data)) stop("You must specify the data if you are using formula input") var <- ps$y grp <- ps$x x <- data } n.grp <- length(table(x[grp])) if(length(col) < n.grp) col <- rainbow(n.grp) if(!is.null(var)) x <- x[,c(var,grp),drop=FALSE] x <- char2numeric(x) col <- adjustcolor(col,alpha.f =alpha) if(missing(main) && freq) main="Frequency Plot" if(missing(ylab) && freq) ylab <- "N * density" if(missing(xlab) && (length(var) ==1)) xlab <- var if(!is.null(grp)) { if(!is.data.frame(grp) && !is.list(grp) && (length(grp) < NROW(x))) grp <- x[,grp,drop=FALSE] d <- by(x[,var],grp,function(xx) density(xx, adjust=adjust,na.rm=TRUE)) quants <-by(x[,var],grp,function(xx) quantile(xx,c(.25,.5,.75),na.rm=TRUE)) maxiy <- rep(NA,length(d)) rangex <- matrix(NA, ncol=2,nrow= length(d)) for(i in 1:length(d) ) {if(freq){ maxiy[i] <- max(d[[i]]$n *d[[i]]$y)} else { maxiy[i] <- max(d[[i]]$y)} rangex[i,] <- range(d[[i]]$x) } maxy <- max(maxiy) ranges <- range(rangex) if(missing(ylim)) ylim <- c(0,maxy) if(missing(xlim)) xlim <- ranges plot(d[[1]],ylim=ylim,xlim=xlim,main=main,ylab=ylab,xlab=xlab ) for (i in 1:length(d)) { if(freq) {scal <- d[[i]]$n d[[i]]$y <- d[[i]]$y*scal} polygon(d[[i]] ,col=col[i]) #find the y value corresponding to x median medy <- max(which(d[[i]]$x <= quants[[i]][2])) segments(x0 = quants[[i]][2],y0=0,x1=quants[[i]][2],y1=d[[i]]$y[medy], lty= i) } } else { d <- density(x[,var],na.rm=TRUE,adjust=adjust) plot(d,main=main,ylab=ylab,xlab=xlab ) polygon(d,col=col[1]) } if(!missing(legend)) { location <- c("topleft","topright","top","left","right") grp.names <- paste(names(grp),names(table(grp))) leg.text <- grp.names legend(location[legend],legend=leg.text,col=c(1:n.grp),fill=c(1:n.grp), lty=c(1:n.grp)) } invisible(d) }psych/R/describe.R0000744000176200001440000003021314550577332013517 0ustar liggesusers"describeFast" <- function(x) { if(!inherits(x[1], "data.frame")) x <- fix.dplyr(x) #to get around a problem created by dplyr nvar <- NCOL(x) nobs <- NROW(x) valid <- colSums(!is.na(x)) temp <- matrix(NA,nrow=nvar,ncol=4) for(i in 1:nvar) {temp[i,1] <- is.numeric(x[1,i]) temp[i,2] <- is.factor(x[1,i]) temp[i,3] <- is.logical(x[1,i]) temp[i,4] <- is.character(x[1,i]) } ttt <- which(temp[,1:4] == TRUE,arr.ind=TRUE) if(nvar > 1) { ttt <- psychTools::dfOrder(ttt,"row")} temp <- cbind(temp,ttt["col"]) colnames(temp) <- c("numeric","factor","logical","character","type") cc <- try(complete.cases(x),silent=TRUE) if(inherits(cc,"try-error")) cc <- NA cc <- sum(cc,na.rm=TRUE) all.numeric <- sum(temp[,1]) all.factor <- sum(temp[,2]) result.df <- data.frame(var=1:nvar,n.obs=valid,temp) result<- list(nvar=nvar,n.obs =nobs,complete.cases = cc,numeric=all.numeric,factors=all.factor,result.df=result.df) class(result) <- c("psych","describeFast") return(result) } #added 1/11/14 #modified 12/29/14 to handle cases with non-numeric data #for fast descriptions describeData <- function (x, head = 4, tail = 4) { valid <- function(x) { sum(!is.na(x)) } nvar <- ncol(x) all.numeric <- nvar ans <- matrix(NA,nrow=nvar,ncol=2) nobs <- nrow(x) cc <- 0 cc <- try(complete.cases(x),silent=TRUE) if(inherits(cc, "try-error")) cc <- NA cc <- sum(cc,na.rm=TRUE) for (i in 1:nvar) { if (is.numeric(x[,i])) {ans[i,2] <- 1 } else { if ((is.factor(x[,i])) || (is.logical(x[,i]))) { ans[i,2] <- 2 } else { if (is.character(x[,i])) { ans[i,2] <- 3 } else {ans[i,2] <- 4} } } ans[i,1] <- valid(x[,i]) } if (is.numeric(unlist(x))) { all.numeric <- TRUE } else { all.numeric <- FALSE } H1 <- t(x[1:head,1:nvar]) T1 <- t(x[(nobs-tail+1):nobs,1:nvar]) temp <- data.frame(V=1:nvar,ans,H1,T1) colnames(temp) <- c("variable #", "n.obs", "type", paste("H", 1:head, sep = ""), paste("T", 1:tail, sep = "")) rownames(temp)[temp[,"type"]!=1] <- paste(rownames(temp)[temp[,"type"]!=1],"*",sep="") result <- (list(n.obs = nobs, nvar = nvar, all.numeric = all.numeric, complete.cases = cc, variables = temp)) class(result) <- c("psych", "describeData") return(result) } #changed October 12, 2011 to use apply because mean and sd are deprecated for data.frames #modified January 10, 2014 to add the check option to improve speed. A few other improvements #modified December 2014 to add the fast option for large data sets #modified May 21, 2015 to allow non-numeric data to be described (but with a warning) #further modified June 21, 2016 to allow for character input as well as well reporting quantiles #tried to improve the speed by using multicores, but this requires using s or lapply which don't do what I need. #Added the as.factor to the convert to numeric for non-numeric data #Improved July 9, 2020 to allow formula input for grouping variables #Further improved Sept 02, 2020 to allow formula input with data specified to match other formula features "describe" <- function (x,na.rm=TRUE,interp=FALSE,skew=TRUE,ranges=TRUE,trim=.1,type=3,check=TRUE,fast=NULL,quant=NULL,IQR=FALSE,omit=FALSE,data=NULL) #basic stats after dropping non-numeric data #slightly faster if we don't do skews { if(inherits(x,"formula")) {ps <- fparse(x) #group was specified, call describeBy if(missing(data)) { x <- get(ps$y) group <- x[,ps$x]} else {x <- data[ps$y] group <- data[ps$x] } describeBy(x,group=group,na.rm=na.rm,interp=interp,skew=skew,ranges=ranges,trim=trim,type=type,check=check,fast=fast,quant=quant,IQR=IQR,omit=omit,data=data) } else { cl <- match.call() #first, define a local function valid <- function(x) {sum(!is.na(x))} if(!na.rm) x <- na.omit(x) #just complete cases if(is.null(fast)) { if (prod(dim(x)) > 10^7) {fast <- TRUE } else {fast <- FALSE}} #the default is to use fast for large data sets if(fast) {skew <- FALSE } numstats <- 10 + length(quant) + IQR cn <- "X1" if ( NCOL(x) < 2) {if(is.data.frame(x)) { # if( !is.numeric(x[,1])) {warning ("You were trying to describe a non-numeric data.frame or vector which describe converted to numeric.") x[,1] <- as.numeric(as.factor(x[,])) } if(!is.null(colnames(x))) {cn <- colnames(x)} else {cn <- "X1"} x <- x[,1] } #getting around the problem of single column data frames #do it for vectors or len <- 1 nvar <- 1 stats = matrix(rep(NA,numstats),ncol=numstats) #create a temporary array stats[1, 1] <- valid(x ) stats[1, 2] <- mean(x, na.rm=na.rm ) stats[1,10] <- sd(x,na.rm=na.rm) if(interp) {stats[1, 3] <- interp.median(x,na.rm=na.rm ) } else {stats[1,3] <- median(x,na.rm=na.rm) } stats[1,9] <- mean(x,na.rm=na.rm, trim=trim) stats[1, 4] <- min(x, na.rm=na.rm ) stats[1, 5] <- max(x, na.rm=na.rm ) stats[1, 6] <- skew(x,na.rm=na.rm,type=type ) stats[1,7] <- mad(x,na.rm=na.rm) stats[1,8] <- kurtosi(x,na.rm=na.rm,type=type) vars <- 1 if(!is.null(quant)) { Qnt <- quantile(x,prob=quant,na.rm=TRUE) stats[1,(IQR+11):numstats] <- t(Qnt)} if(IQR) {Quart <- t(quantile(x,prob=c(.25,.75),na.rm=TRUE)) Iqr <- Quart[,2] -Quart[,1] stats[1,11] <- Iqr } rownames(stats) <- cn } else { nvar <- ncol(x) stats = matrix(rep(NA,nvar*numstats),ncol=numstats) #create a temporary array if(is.null(colnames(x))) colnames(x) <- paste0("X",1:ncol(x)) rownames(stats) <- colnames(x) stats[,1] <- apply(x,2,valid) vars <- c(1:nvar) ##adapted from the pairs function to convert logical or categorical to numeric select <- 1:nvar if(!is.matrix(x) && check) { #does not work for matrices for(i in 1:nvar) { if(!is.numeric(x[[i]] )) { if(fast) {x[[i]] <- NA} else { if(omit) {select[i] <- NA} if(is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) { x[[i]] <- as.numeric(as.factor(x[[i]])) rownames(stats)[i] <- paste(rownames(stats)[i],"*",sep="") } else {x[[i]] <- NA} } } } } select <- select[!is.na(select)] x <- as.matrix(x[,select]) vars <- vars[select] stats <- stats[select,] if(!is.numeric(x)) {message("Converted non-numeric matrix input to numeric. Are you sure you wanted to do this. Please check your data") x <- matrix(as.numeric(x),ncol=nvar) rownames(stats) <- paste0(rownames(stats),"*")} stats[,2] <- apply(x, 2,mean,na.rm=na.rm ) stats[,10] <- apply(x,2,sd,na.rm=na.rm) if (skew) {stats[, 6] <- skew(x,na.rm=na.rm,type=type ) stats[,8] <- kurtosi(x,na.rm=na.rm,type=type)} if(ranges) { if(fast) { stats[,4] <- apply(x,2,min,na.rm=na.rm) stats[,5] <- apply(x,2,max,na.rm = na.rm) } else { stats[, 4] <- apply(x,2,min, na.rm=na.rm ) stats[, 5] <- apply(x,2,max, na.rm=na.rm ) stats[,7] <- apply(x,2,mad, na.rm=na.rm) stats[,9] <- apply(x,2, mean,na.rm=na.rm,trim=trim) if(interp) {stats[, 3] <- apply(x,2,interp.median,na.rm=na.rm ) } else {stats[,3] <- apply(x,2,median,na.rm=na.rm) } }} if(!is.null(quant)) { Qnt <- apply(x,2,quantile,prob=quant,na.rm=TRUE) stats[,(IQR+11):numstats] <- t(Qnt)} if(IQR) {Quart <- t(apply(x,2,quantile,prob=c(.25,.75),na.rm=TRUE)) Iqr <- Quart[,2] - Quart[,1] stats[,11] <- Iqr } } #end of maxtrix input #now summarize the results if (numstats > (10 + IQR)) { colnames(stats)[(11+IQR):numstats] <- paste0("Q",quant[1:length(quant)])} #the following output was cleaned up on June 22, 2016 added the quantile information. #the various options are ranges, skew, fast, numstats > 10 if(fast) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1])) } #minimal case #if((!skew) && ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) } if(skew) { if(ranges) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) } else { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1])) } } else {if(ranges) {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) } else { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1])) } } if(IQR) answer <- data.frame(answer,IQR=stats[,11]) if (numstats > (10+ IQR)) {if(nvar > 1 ) {answer <- data.frame(answer, stats[,(IQR+11):numstats]) #add the quantile information } else { answer <- data.frame(answer, t(stats[,(IQR+11):numstats])) rownames(answer) <- cn} } # {if (ranges) { # if(skew){ # # # if(numstats > 10) { answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5], # range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8], se=stats[,10]/sqrt(stats[,1]), stats[,(11:numstats)])} else { # answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[, 3],trimmed =stats[,9], mad = stats[,7], min= stats[,4],max=stats[,5], # range=stats[,5]-stats[,4],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} #the typical (maximum) case # } else { # if(!fast) { # if(numstats > 10) { # answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10], median = stats[,3],trimmed =stats[,9], # mad = stats[,7],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #somewhat shorter # } else {answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],min= stats[,4],max=stats[,5], range=stats[,5]-stats[,4],se=stats[,10]/sqrt(stats[,1])) #even shorter # } # }} else { # if(skew){answer <- data.frame(vars,n = stats[,1],mean=stats[,2], sd =stats[,10],skew = stats[, 6], kurtosis = stats[,8],se=stats[,10]/sqrt(stats[,1]))} else { # answer <- data.frame(vars=vars,n = stats[,1],mean=stats[,2], sd = stats[,10],se=stats[,10]/sqrt(stats[,1]))}} #the minimal case -- fixed 1/2/2014 # } # answer <-data.frame(var=vars,temp, se = temp$sd/sqrt(temp$n)) #replaced with forming the answer in the if statements 10/1/2014 to improve memory management class(answer) <- c("psych","describe","data.frame") return(answer) } } #minor patch on 9/10/23 for case of single variables with quantspsych/R/factor.scores.R0000644000176200001440000001531214443664121014506 0ustar liggesusers"factor.scores" <- function(x,f,Phi=NULL,method=c("Thurstone","tenBerge","Anderson","Bartlett","Harman","components"),rho=NULL,missing=FALSE,impute="none") { #the normal case is f is the structure matrix and Phi is not specified #Note that the Grice formulas distinguish between Pattern and Structure matrices #I need to confirm that I am doing this if(length(method) > 1) method <- "tenBerge" #the default if(method=="regression") method <- "Thurstone" if(method %in% c("tenberge", "Tenberge","tenBerge","TenBerge")) method <- "tenBerge" if(length(class(f)) > 1) { if(inherits(f[2] ,"irt.fa" )) f <- f$fa } if(!is.matrix(f)) {Phi <- f$Phi f <- loadings(f) if(ncol(f)==1) {method <- "Thurstone"} } nf <- dim(f)[2] if(is.null(Phi)) Phi <- diag(1,nf,nf) if(dim(x)[1] == dim(f)[1]) {r <- as.matrix(x) square <- TRUE} else { square <- FALSE if(!is.null(rho)) {r <- rho } else { r <- cor(x,use="pairwise") #find the correlation matrix from the data }} S <- f %*% Phi #the Structure matrix switch(method, "Thurstone" = { w <- try(solve(r,S),silent=TRUE ) #these are the factor weights (see Grice eq. 5) if(inherits(w,"try-error")) {message("In factor.scores, the correlation matrix is singular, the pseudo inverse is used") # r <- cor.smooth(r) w <- Pinv(r) %*% S } # w <- try(solve(r,S),silent=TRUE) #redudant, this will fail again (pointed out by Chanlder McClellan 4/4/23) # if(inherits(w,"try-error")) {message("I was unable to calculate the factor score weights, factor loadings used instead") # w <- f} colnames(w) <- colnames(f) rownames(w) <- rownames(f) }, "tenBerge" = { #Following Grice equation 8 to estimate scores for oblique solutions (with a correction to the second line where r should r.inv L <- f %*% matSqrt(Phi) r.5 <- invMatSqrt(r) r <- cor.smooth(r) inv.r <- try(solve(r),silent=TRUE) if(inherits(inv.r, as.character("try-error"))) {warning("The tenBerge based scoring could not invert the correlation matrix, regression scores found instead") ev <- eigen(r) ev$values[ev$values < .Machine$double.eps] <- 100 * .Machine$double.eps r <- ev$vectors %*% diag(ev$values) %*% t(ev$vectors) diag(r) <- 1 w <- solve(r,f)} else { C <- r.5 %*% L %*% invMatSqrt(t(L) %*% inv.r %*% L) #note that this is the correct formula, per Grice personal communication w <- r.5 %*% C %*% matSqrt(Phi)} colnames(w) <- colnames(f) rownames(w) <- rownames(f) }, "Harman" = { #Grice equation 10 -- # m <- t(f) %*% f #factor intercorrelations m <- f %*% t(S) #should be this (the model matrix) Revised August 31, 2017 diag(m) <- 1 #Grice does not say this, but it is necessary to make it work! inv.m <- solve(m) # w <- f %*%inv.m w <- inv.m %*% f }, "Anderson" = { #scores for orthogonal factor solution will be orthogonal Grice Eq 7 and 8 I <- diag(1,nf,nf) h2 <- diag( f %*% Phi %*% t(f)) U2 <- 1 - h2 inv.U2 <- diag(1/U2) w <- inv.U2 %*% f %*% invMatSqrt(t(f) %*% inv.U2 %*% r %*% inv.U2 %*% f) colnames(w) <- colnames(f) rownames(w) <- rownames(f) }, "Bartlett" = { #Grice eq 9 # f should be the pattern, not the structure I <- diag(1,nf,nf) h2 <- diag( f %*% Phi %*% t(f)) U2 <- 1 - h2 inv.U2 <- diag(1/U2) w <- inv.U2 %*% f %*% (solve(t(f) %*% inv.U2 %*% f)) colnames(w) <- colnames(f) rownames(w) <- rownames(f) }, "none" = {w <- NULL}, "components" = {w <- try(solve(r,f),silent=TRUE ) #basically, just do the regression/Thurstone approach for components w <- f } ) #now find a few fit statistics if(is.null(w)) {results <- list(scores=NULL,weights=NULL)} else { R2 <- diag(t(w) %*% S) #this had been R2 <- diag(t(w) %*% f) Corrected Sept 1, 2017 if(any(R2 > 1) || (prod(!is.nan(R2)) <1) || (prod(R2) < 0) ) {#message("The matrix is probably singular -- Factor score estimate results are likely incorrect") R2[abs(R2) > 1] <- NA R2[R2 <= 0] <- NA } #if ((max(R2,na.rm=TRUE) > (1 + .Machine$double.eps)) ) {message("The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.")} r.scores <- cov2cor(t(w) %*% r %*% w) #what actually is this? if(square) { #that is, if given the correlation matrix class(w) <- NULL results <- list(scores=NULL,weights=w) results$r.scores <- r.scores results$R2 <- R2 #this is the multiple R2 of the scores with the factors } else { #missing <- rowSums(is.na(x)) if(missing && (impute !="none")) { x <- data.matrix(x) miss <- which(is.na(x),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x,na.rm=TRUE) #replace missing values with means x[miss]<- item.means[miss[,2]]} else { item.med <- apply(x,2,median,na.rm=TRUE) #replace missing with medians x[miss]<- item.med[miss[,2]]} #this only works if items is a matrix } if(method !="components") {if(impute!="none") {scores <- factorScoresSapa(weights=w, items = scale(x))} else { scores <- scale(x) %*% w }} else { #standardize the data before doing the regression if using factors, scores <- x %*% w} # for components, the data have already been zero centered and, if appropriate, scaled results <- list(scores=scores,weights=w) results$r.scores <- r.scores results$missing <- missing results$R2 <- R2 #this is the multiple R2 of the scores with the factors } } return(results) } #how to treat missing data? see score.item "matSqrt" <- function(x) { e <- eigen(x) e$values[e$values < 0] <- .Machine$double.eps sqrt.ev <- sqrt(e$values) #need to put in a check here for postive semi definite result <- e$vectors %*% diag(sqrt.ev) %*% t(e$vectors) result} "invMatSqrt" <- function(x) { e <- eigen(x) if(is.complex(e$values)) {warning("complex eigen values detected by invMatSqrt, results are suspect") result <- x } else { e$values[e$values < .Machine$double.eps] <- 100 * .Machine$double.eps inv.sqrt.ev <- 1/sqrt(e$values) #need to put in a check here for postive semi definite result <- e$vectors %*% diag(inv.sqrt.ev) %*% t(e$vectors) } result} psych/R/cortest.bartlett.R0000644000176200001440000000143312660156245015237 0ustar liggesusers"cortest.bartlett" <- function(R,n=NULL,diag=TRUE) { #message("Bartlett's test that correlation matrix is an identity matrix") if (dim(R)[1] != dim(R)[2]) {n <- dim(R)[1] message("R was not square, finding R from data") R <- cor(R,use="pairwise")} p <- dim(R)[2] if(!is.matrix(R) ) R <- as.matrix(R) #converts data.frames to matrices if needed if(is.null(n)) {n <- 100 warning("n not specified, 100 used") } if(diag) diag(R) <- 1 #this will make tests of factor residuals correct detR <- det(R) statistic <- -log(detR) *(n -1 - (2*p + 5)/6) df <- p * (p-1)/2 pval <- pchisq(statistic,df,lower.tail=FALSE) bartlett <- list(chisq = statistic, p.value =pval, df =df) return(bartlett) } psych/R/irt.discrim.R0000644000176200001440000000116012253362137014155 0ustar liggesusers"irt.discrim" <- function(item.diff,theta,items) { #find the item discrimination parameter (beta) #find the item discrimination parameter -- optimized by item.discrim irt.item.discrim <- function(x,diff,theta,scores) { fit <- -1*(log(scores/(1+exp(x*(diff-theta))) + (1-scores)/(1+exp(x*(theta-diff))))) mean(fit,na.rm=TRUE) } nitems <- length(item.diff) discrim <- matrix(NaN,nitems,2) for (i in 1:nitems) { item.fit <- optimize(irt.item.discrim,c(-5,5),diff=item.diff[i],theta=theta,scores = items[,i]) discrim[i,1] <- item.fit$minimum discrim[i,2] <- item.fit$objective} irt.discrim <- discrim } psych/R/factor.pa.R0000644000176200001440000002226612456326542013622 0ustar liggesusers"factor.pa" <- function(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA,scores=FALSE,SMC=TRUE,missing=FALSE,impute="median", min.err = .001,digits=2,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="pa") { cl <- match.call() .Deprecated("fa",msg="factor.pa is deprecated. Please use the fa function with fm=pa") ##first some functions that are internal to factor.minres #this does the ULS fitting "fit.residuals.ols" <- function(Psi,S,nf) { diag(S) <- 1- Psi eigens <- eigen(S) eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps #loadings <- eigen.loadings(eigens)[,1:nf] if(nf >1 ) { loadings <- eigens$vectors[,1:nf] %*% diag(sqrt(eigens$values[1:nf])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) residual <- (S - model)^2 diag(residual) <- 0 error <- sum(residual) } #this code is taken (with minor modification to make ULS) from factanal #it does the iterative calls to fit.residuals "min.res" <- function(S,nf) { S.smc <- smc(S) if(sum(S.smc) == nf) {start <- rep(.5,nf)} else {start <- 1- S.smc} res <- optim(start, fit.residuals.ols, method = "L-BFGS-B", lower = .005, upper = 1, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S ) Lambda <- FAout(res$par, S, nf) result <- list(loadings=Lambda,res=res) } #these were also taken from factanal FAout <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1L:q, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1L:q] - 1, 0)), q) diag(sqrt(Psi)) %*% load } FAfn <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE, only.values = TRUE) e <- E$values[-(1L:q)] e <- sum(log(e) - e) - q + nrow(S) -e } ## now start the main function if((fm !="pa") & (fm != "minres")) {message("factor method not specified correctly, principal axes used") fm <- "pa" } n <- dim(r)[2] if (n!=dim(r)[1]) { n.obs <- dim(r)[1] if(scores) {x.matrix <- r if(missing) { #impute values miss <- which(is.na(x.matrix),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x.matrix,na.rm=TRUE) #replace missing values with means x.matrix[miss]<- item.means[miss[,2]]} else { item.med <- apply(x.matrix,2,median,na.rm=TRUE) #replace missing with medians x.matrix[miss]<- item.med[miss[,2]]} }} r <- cor(r,use="pairwise") # if given a rectangular matrix, then find the correlations first } else { if(!is.matrix(r)) { r <- as.matrix(r)} sds <- sqrt(diag(r)) #convert covariance matrices to correlation matrices r <- r/(sds %o% sds) } #added June 9, 2008 if (!residuals) { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),fit=0)} else { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)} r.mat <- r Phi <- NULL colnames(r.mat) <- rownames(r.mat) <- colnames(r) if(SMC) { if(nfactors < n/2) {diag(r.mat) <- smc(r) } else {if (warnings) message("too many factors requested for this number of variables to use SMC, 1s used instead")} } orig <- diag(r) comm <- sum(diag(r.mat)) err <- comm i <- 1 comm.list <- list() if(fm=="pa") { while(err > min.err) #iteratively replace the diagonal with our revised communality estimate { eigens <- eigen(r.mat,symmetric=symmetric) eigens$values[ eigens$values < .Machine$double.eps] <- .Machine$double.eps #added May 14, 2009 to fix case of singular matrices #loadings <- eigen.loadings(eigens)[,1:nfactors] if(nfactors >1 ) {loadings <- eigens$vectors[,1:nfactors] %*% diag(sqrt(eigens$values[1:nfactors])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) new <- diag(model) comm1 <- sum(new) diag(r.mat) <- new err <- abs(comm-comm1) if(is.na(err)) {warning("imaginary eigen value condition encountered in factor.pa,\n Try again with SMC=FALSE \n exiting factor.pa") break} comm <- comm1 comm.list[[i]] <- comm1 i <- i + 1 if(i > max.iter) {if(warnings) {message("maximum iteration exceeded")} err <-0 } } } if(fm=="minres") { #added April 12, 2009 to do ULS fits uls <- min.res(r,nfactors) eigens <- eigen(r) #used for the summary stats result$par <- uls$res loadings <- uls$loadings } # a weird condition that happens with the Eysenck data #making the matrix symmetric solves this problem if(!is.double(loadings)) {warning('the matrix has produced imaginary results -- proceed with caution') loadings <- matrix(as.double(loadings),ncol=nfactors) } #make each vector signed so that the maximum loading is positive - probably should do after rotation #Alternatively, flip to make the colSums of loading positive if (FALSE) { if (nfactors >1) { maxabs <- apply(apply(loadings,2,abs),2,which.max) sign.max <- vector(mode="numeric",length=nfactors) for (i in 1: nfactors) {sign.max[i] <- sign(loadings[maxabs[i],i])} loadings <- loadings %*% diag(sign.max) } else { mini <- min(loadings) maxi <- max(loadings) if (abs(mini) > maxi) {loadings <- -loadings } loadings <- as.matrix(loadings) if(fm=="minres") {colnames(loadings) <- "mr1"} else {colnames(loadings) <- "PA1"} } #sign of largest loading is positive } #added January 5, 2009 to flip based upon colSums of loadings if (nfactors >1) {sign.tot <- vector(mode="numeric",length=nfactors) sign.tot <- sign(colSums(loadings)) loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) <0) {loadings <- -as.matrix(loadings)} else {loadings <- as.matrix(loadings)} colnames(loadings) <- "PA1" } #end addition if(fm=="minres") {colnames(loadings) <- paste("MR",1:nfactors,sep='') } else {colnames(loadings) <- paste("PA",1:nfactors,sep='')} rownames(loadings) <- rownames(r) loadings[loadings==0.0] <- 10^-15 #added to stop a problem with varimax if loadings are exactly 0 model <- loadings %*% t(loadings) #f.loadings <- loadings #used to pass them to factor.stats Phi <- NULL if(rotate != "none") {if (nfactors > 1) { if (rotate=="varimax" | rotate=="quartimax") { rotated <- do.call(rotate,list(loadings)) loadings <- rotated$loadings Phi <- NULL} else { if ((rotate=="promax")|(rotate=="Promax")) {pro <- Promax(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate == "cluster") {loadings <- varimax(loadings)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax") { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- do.call(rotate,list(loadings) ) loadings <- ob$loadings Phi <- ob$Phi} } }}} }} if(nfactors >1) { ev.rotated <- diag(t(loadings) %*% loadings) ev.order <- order(ev.rotated,decreasing=TRUE) loadings <- loadings[,ev.order]} rownames(loadings) <- colnames(r) if(!is.null(Phi)) {Phi <- Phi[ev.order,ev.order] } #January 20, 2009 but, then, we also need to change the order of the rotation matrix! class(loadings) <- "loadings" if(nfactors < 1) nfactors <- n result <- factor.stats(r,loadings,Phi,n.obs) #do stats as a subroutine common to several functions result$rotate <- rotate result$loadings <- loadings result$values <- eigens$values result$communality <- round(diag(model),digits) result$uniquenesses <- round(diag(r-model),digits) if(!is.null(Phi)) {result$Phi <- Phi} if(fm=="pa") result$communality.iterations <- round(unlist(comm.list),digits) if(scores) {result$scores <- factor.scores(x.matrix,loadings) } result$factors <- nfactors result$fn <- "factor.pa" result$fm <- fm result$Call <- cl class(result) <- c("psych", "fa") return(result) } #modified October 30, 2008 to sort the rotated loadings matrix by the eigen values. psych/R/print.psych.mediate.R0000644000176200001440000001765114444432447015640 0ustar liggesusers"print_psych.mediate" <- function(x,digits=2,short=TRUE) { cat("\nMediation/Moderation Analysis \nCall: ") print(x$Call) dv <- x$var.names[["DV"]] # iv <- x$var.names[["IV"]] mv <- x$var.names[["med"]] mod <- x$var.names[["mod"]] # dv <- x$names[1] iv <- rownames(x$direct) if(iv[1] == "Intercept") iv <- iv[-1] niv <- length(iv) nmed <- length(mv) ndv <- length(dv) nz <- length(x$var.names[["z"]]) # if(dim(x$a)) {mv <- names(x$a)} else {mv <- colnames(x$a) cat("\nThe DV (Y) was ", dv,". The IV (X) was ", iv,". The mediating variable(s) = ", mv,".") if(!is.null(x$mod)) cat(" The moderating variable(s) = ",mod) if(!is.null(x$var.names$z)) cat(" Variable(s) partialled out were", x$var.names[["z"]]) if(!is.null(mv)) { for(j in 1:ndv) { for(i in 1:niv) { cat("\n\nTotal effect(c) of ",iv[i], " on ", dv[j]," = ",round(x$direct[i+1,j],digits), " S.E. = ", round(x$total.reg$se[i+1,j],digits), " t = ",round(x$total.reg$t[i+1,j],digits)," df= ",x$total.reg$df, " with p = ", signif(x$total.reg$prob[i+1,j],digits)) cat("\nDirect effect (c') of ",iv[i], " on ", dv[j]," removing ", mv ," = ",round(x$cprime.reg$beta[i+1,j],digits), " S.E. = ", round(x$cprime.reg$se[i+1,j],digits), " t = ",round(x$cprime.reg$t[i+1,j],digits), " df= ", x$cprime.reg$df, " with p = ", signif(x$cprime.reg$prob[i+1,j],digits)) if(is.null(x$mod)) { cat("\nIndirect effect (ab) of ",iv[i], " on ", dv[j]," through " ,mv , " = ", round(x$ab[i,j],digits),"\n") cat("Mean bootstrapped indirect effect = ",round(x$boot$mean[i],digits), " with standard error = ",round(x$boot$sd[i],digits), " Lower CI = ",round(x$boot$ci[1,i],digits), " Upper CI = ", round(x$boot$ci[2,i],digits))} } F <- x$cprime.reg$df * x$cprime.reg$R2[j]/(((nrow(x$cprime.reg$beta)-1) * (1-x$cprime.reg$R2[j]))) pF <- -expm1(pf(F,nrow(x$cprime.reg$beta),x$cprime.reg$df,log.p=TRUE)) cat("\nR =", round(sqrt(x$cprime.reg$R2[j]),digits),"R2 =", round(x$cprime.reg$R2[j],digits), " F =", round(F,digits), "on",nrow(x$cprime.reg$beta)-1, "and", x$cprime.reg$df,"DF p-value: ",signif(pF,digits+1), "\n") } if(short) {cat("\n To see the longer output, specify short = FALSE in the print statement or ask for the summary")} else { if(is.null(x$mod)) { cat("\n\n Full output \n") summary_psych.mediate(x) if(FALSE) { cat("\nDirect effect estimates (traditional regression) (c') \n") for(j in 1:ndv) { if (niv==1) { dfd <- round(data.frame(direct=x$cprime.reg$beta[,j],se = x$cprime.reg$se[,j],t=x$cprime.reg$t[,j],df=x$cprime.reg$df),digits) dfdp <- cbind(dfd,p=signif(x$cprime.reg$prob[,j],digits=digits+1)) } else { dfd <- round(data.frame(direct=x$cprime.reg$beta[1:(niv+1+nmed),j],se = x$cprime.reg$se[1:(niv+1+nmed),j],t=x$cprime.reg$t[1:(niv+1+nmed),j],df=x$cprime.reg$df),digits) dfdp <- cbind(dfd,p=signif(x$cprime.reg$prob[1:(niv+1+nmed),j],digits=digits+1)) } colnames(dfdp) <- c(dv[j],"se","t","df","Prob") print(dfdp) F <- x$cprime.reg$df * x$cprime.reg$R2[j]/(((nrow(x$cprime.reg$beta)-1) * (1-x$cprime.reg$R2[j]))) pF <- -expm1(pf(F,nrow(x$cprime.reg$beta)-1,x$cprime.reg$df,log.p=TRUE)) cat("\nR =", round(sqrt(x$cprime.reg$R2[j]),digits),"R2 =", round(x$cprime.reg$R2[j],digits), " F =", round(F,digits), "on",nrow(x$cprime.reg$beta)-1, "and", x$cprime.reg$df,"DF p-value: ",signif(pF,digits+1), "\n") } cat("\n Total effect estimates (c) \n") for(j in 1:ndv) { dft <- round(data.frame(direct=x$total.reg$beta[,j],se = x$total.reg$se[,j],t=x$total.reg$t[,j],df=x$total.reg$df),digits) dftp <- cbind(dft,p = signif(x$total.reg$prob[,j],digits=digits+1)) colnames(dftp) <- c(dv[j],"se","t","df","Prob") rownames(dftp) <- rownames(x$total.reg$beta) print(dftp) } cat("\n 'a' effect estimates \n") if(niv==1) { dfa <- round(data.frame(a = x$a.reg$beta[1,1:nmed],se = x$a.reg$se[1,1:nmed],t = x$a.reg$t[1,1:nmed],df= x$a.reg$df),digits) dfa <- cbind(dfa,p=signif(x$a.reg$prob[1,1:nmed],digits=digits+1)) if(NROW(dfa) ==1) {rownames(dfa) <- rownames(x$a.reg$beta)[-1] colnames(dfa) <- c(colnames(x$a.reg$beta),"se","t","df", "Prob")} else { rownames(dfa) <- colnames(x$a.reg$beta) colnames(dfa) <- c(rownames(x$a.reg$beta),"se","t","df", "Prob")} print(dfa)} else { for (i in 1:nmed) { dfa <- round(data.frame(a = x$a.reg$beta[,i],se = x$a.reg$se[,i],t = x$a.reg$t[,i],df= x$a.reg$df),digits) dfa <- cbind(dfa,p=signif(x$a.reg$prob[,i],digits=digits+1)) rownames(dfa) <-rownames(x$a.reg$beta) colnames(dfa) <- c(colnames(x$a.reg$beta)[i],"se","t","df","Prob") print(dfa) } } cat("\n 'b' effect estimates \n") for (j in 1:ndv) { if(niv==1) { dfb <- round(data.frame(direct=x$b.reg$beta[-(1:niv),j],se = x$b.reg$se[-(1:niv),j],t=x$b.reg$t[-(1:niv),j], df=x$b.reg$df),digits) dfb <- cbind(dfb,p=signif(x$b.reg$prob[-(1:niv),j],digits=digits+1))} else { dfb <- round(data.frame(direct=x$b.reg$beta[-(1:niv),j],se = x$b.reg$se[-(1:niv),j],t=x$b.reg$t[-(1:niv),j],df=x$b.reg$df),digits) dfb <- cbind(dfb,p=signif(x$b.reg$prob[-(1:niv),j],digits=digits+1))} rownames(dfb) <- rownames(x$b.reg$beta)[-(1:niv)] colnames(dfb) <- c(dv[j],"se","t","df", "Prob") print(dfb) } cat("\n 'ab' effect estimates \n") for (j in 1:ndv) { dfab <-round(data.frame(indirect = x$ab[,j],boot = x$boot$mean[,j],sd=x$boot$sd[,j], lower=x$boot$ci[1,1:niv], upper=x$boot$ci[2,1:niv]),digits) rownames(dfab) <- rownames(x$ab) colnames(dfab)[1] <- dv[j] print(round(dfab,digits)) } if(nmed > 1) { cat("\n 'ab' effects estimates for each mediator \n") for (j in 1:nmed) { dfab <-round(data.frame(indirect = x$all.ab[,j],boot = x$boot$mean[,j+ndv],sd=x$boot$sd[,j+ndv], lower=x$boot$ci[1,(j*niv +1):(j*niv +niv)], upper=x$boot$ci[2,(j*niv +1):(j*niv +niv)]),digits) rownames(dfab) <- rownames(x$ab) colnames(dfab)[1] <- mv[j] print(round(dfab,digits)) } } } } else { cat("\n\nEffect of interaction of ",iv[1], " with ", iv[2] , " = ", round(x$direct[3],digits)," S.E. = ", round(x$direct.reg$se[3,1],digits), " t = ",round(x$direct.reg$t[3,1],digits), " with p = ", signif(x$direct.reg$prob[3,1],digits)) cat("\nIndirect effect due to interaction of ",iv[1], " with ", iv[2] , " = ", round(x$indirect,digits)) cat("\nMean bootstrapped indirect interaction effect = ",round(x$boot$mean[1],digits), " with standard error = ",round(x$boot$sd[1],digits), " Lower CI = ",round(x$boot$ci.ab[1],digits), " Upper CI = ", round(x$boot$ci.ab[2,i],digits)) cat("\nSummary of a, b, and ab estimates and ab confidence intervals\n") } } } else {#This is a pure regression model, just show it summary(x) # for(i in 1:ndv) {cat("\n DV = ",colnames(x$total.reg$beta)[i], "\n") # result.df <- data.frame( round(x$total.reg$beta[,i],digits),round(x$total.reg$se[,i],digits),round(x$total.reg$t[,i],digits),signif(x$total.reg$prob[,i],digits)) # colnames(result.df) <- c("slope","se", "t", "p") # print(result.df) # # cat("\nWith R2 = ", round(x$cpime.reg$R2[i], digits)) # F <- x$cprime.reg$df * x$cprime.reg$R2[i]/(((nrow(x$cprime.reg$beta)-1) * (1-x$cprime.reg$R2[i]))) # pF <- -expm1(pf(F,nrow(x$cprime.reg$beta),x$cprime.reg$df,log.p=TRUE)) # cat("\nR =", round(sqrt(x$cprime.reg$R2[i]),digits),"R2 =", round(x$cprime.reg$R2[i],digits), " F =", round(F,digits), "on",nrow(x$cprime.reg$beta)-1, "and", x$total.reg$df,"DF p-value: ",signif(pF,digits+1), "\n") # } } psych/R/fa.poly.R0000644000176200001440000001212313761465351013306 0ustar liggesusers #polychoric factor analysis with confidence intervals "fa.poly" <- function(x,nfactors=1,n.obs = NA,n.iter=1,rotate="oblimin",SMC=TRUE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,scores="regression",oblique.scores=TRUE,weight=NULL,global =TRUE, ...) { cl <- match.call() ncat <- 8 n.obs <- dim(x)[1] tx <- table(as.matrix(x)) if(dim(tx)[1] ==2) {tet <- tetrachoric(x) typ = "tet"} else { tab <- apply(x,2,function(x) table(x)) if(is.list(tab)) {len <- lapply(tab,function(x) length(x))} else {len <- dim(tab)[1] } nvar <- ncol(x) dvars <- subset(1:nvar,len==2) #find the dichotomous variables pvars <- subset(1:nvar,((len > 2) & (len <= ncat))) #find the polytomous variables cvars <- subset(1:nvar,(len > ncat)) #find the continuous variables (more than ncat levels) if(length(pvars)==ncol(x)) {tet <- polychoric(x,weight=weight,global=global) typ = "poly"} else {tet <- mixed.cor(x,weight=weight,global=global) typ="mixed" }} r <- tet$rho #call fa with the polychoric/tetrachoric matrix #fa will not return scores, we still need to find them f <- fa(r,nfactors=nfactors,n.obs=n.obs,rotate=rotate,SMC = SMC,missing=FALSE,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,scores=scores,oblique.scores=oblique.scores,...) #call fa with the appropriate parameters f$Call <- cl fl <- f$loadings #this is the original nvar <- dim(fl)[1] if(n.iter > 1) { e.values <- list(pc =vector("list",n.iter),fa =vector("list",n.iter)) replicates <- vector("list",n.iter) rep.rots <- vector("list",n.iter) for (trials in 1:n.iter) { xs <- x[sample(n.obs,n.obs,replace=TRUE),] if(typ!= "tet") {tets <- mixedCor(xs,weight=weight,global=global)} else {tets <- tetrachoric(xs,weight=weight)} r <- tets$rho values.samp <- eigen(tets$rho)$values e.values[["pc"]][[trials]] <- values.samp fs <- fa(r,nfactors=nfactors,rotate=rotate,SMC = SMC,missing=FALSE,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,...) #call fa with the appropriate parameters e.values[["fa"]][[trials]] <- fs$values if(nfactors > 1) {t.rot <- target.rot(fs$loadings,fl) replicates[[trials]] <- t.rot$loadings if(!is.null(fs$Phi)) { phis <- fs$Phi # should we rotate the simulated factor correlations? rep.rots[[trials]] <- phis[lower.tri(phis)]}} else { replicates[[trials]] <- fs$loadings} } replicates <- matrix(unlist(replicates),ncol=nfactors*nvar,byrow=TRUE) if(!is.null( f$Phi) ) {rep.rots <- matrix(unlist(rep.rots),ncol=nfactors*(nfactors-1)/2,byrow=TRUE) z.rot <- fisherz(rep.rots) means.rot <- colMeans(z.rot,na.rm=TRUE) sds.rot <- apply(z.rot,2,sd, na.rm=TRUE) sds.rot <- fisherz2r(sds.rot) ci.rot.lower <- means.rot + qnorm(p/2) * sds.rot ci.rot.upper <- means.rot + qnorm(1-p/2) * sds.rot means.rot <- fisherz2r(means.rot) ci.rot.lower <- fisherz2r(ci.rot.lower) ci.rot.upper <- fisherz2r(ci.rot.upper) ci.rot <- data.frame(lower=ci.rot.lower,upper=ci.rot.upper) } else {rep.rots <- NULL means.rot <- NULL sds.rot <- NULL z.rot <- NULL ci.rot <- NULL } z.replicates <- fisherz(replicates) #convert to z scores means <- matrix(colMeans(z.replicates,na.rm=TRUE),ncol=nfactors) sds <- matrix(apply(z.replicates,2,sd,na.rm=TRUE),ncol=nfactors) ci.lower <- means + qnorm(p/2) * sds ci.upper <- means + qnorm(1-p/2) * sds means <- fisherz2r(means) sds <- fisherz2r(sds) ci.lower <- fisherz2r(ci.lower) ci.upper <- fisherz2r(ci.upper) #ci.low.e <- apply(replicates,2, quantile, p/2) #ci.up.e <- apply(replicates,2,quantile, (1-p/2)) #ci <- data.frame(lower = ci.lower, upper=ci.upper, low.e=ci.low.e, up.e=ci.up.e) ci <- data.frame(lower = ci.lower,upper=ci.upper) class(means) <- "loadings" #class(sds) <- "loadings" colnames(means) <- colnames(sds) <- colnames(fl) rownames(means) <- rownames(sds) <- rownames(fl) ei.pc <-describe(matrix(unlist(e.values$pc),ncol=nvar,byrow=TRUE)) #eigen values of pcs ei.fa <- describe(matrix(unlist(e.values$fa),ncol=nvar,byrow=TRUE)) #eigen values of fa e.stats <- list(ob.fa=f$values,ob.pc=f$e.values,pc=ei.pc,fa=ei.fa) results <- list(fa = f,rho=tet$rho,tau=tet$tau,n.obs=n.obs,means = means,sds = sds,ci = ci, means.rot=means.rot,sds.rot=sds.rot,ci.rot=ci.rot,Call= cl,replicates=replicates,rep.rots=rep.rots,e.values=e.values,e.stats=e.stats) class(results) <- c("psych","fa.ci") } else {results <- list(fa = f,rho=r,tau=tet$tau,n.obs=n.obs,Call=cl) if(oblique.scores) {results$scores <- factor.scores(x,f=f$loadings,Phi=f$Phi,method=scores,rho=r) } else {results$scores <- factor.scores(x,f=f$Structure,method=scores,rho=r)} class(results) <- c("psych","fa") } return(results) } #written May 3 2011 #revised Sept 13, 2013 to allow for mixed cor input #and to find factor scores if data are given #corrected Sept 20, 2013 to do the ci on the fisher zs and then convert back to r psych/R/factor.stats.R0000644000176200001440000004154714472205252014355 0ustar liggesusers#added the coarse option (defaulting to TRUE) 8/25/23 "factor.stats" <- function(r=NULL,f,phi=NULL,n.obs=NA,np.obs=NULL,alpha=.1,fm=NULL,smooth=TRUE, coarse=TRUE) { fa.stats(r=r,f=f,phi=phi,n.obs=n.obs,np.obs=np.obs,alpha=alpha,fm=fm,smooth=smooth,coarse=coarse)} "fa.stats" <- function(r=NULL,f,phi=NULL,n.obs=NA,np.obs=NULL,alpha=.05,fm=NULL,smooth=TRUE,coarse=TRUE) { #revised June 21, 2010 to add RMSEA etc. #revised August 25, 2011 to add cor.smooth for smoothing #revised November 10, 2012 to add stats for the minchi option of factoring #revised February 28, 2014 to emphasize empirical chi 2 and report empirical BIC #revised March 9, 2015 to report NA if RMSEA values are not in the confidence intervals cl <- match.call() conf.level <- alpha if((!is.matrix(f)) && (!is.data.frame(f))) {#do a number of things that use f as list if(is.null(r) && (!is.null(f$r)) ) r <- f$r #we found the correlation while factoring #if(is.na(n.obs) && (!is.null(f$np.obs))) {np.obs <- f$np.obs} f <- as.matrix(f$loadings)} else {f <- as.matrix(f)} n <- dim(r)[2] #number of variables if(dim(r)[1] !=n ) {n.obs = dim(r)[1] r <- cor(r,use="pairwise") } if(is.data.frame(r)) r <- as.matrix(r) nfactors <- dim(f)[2] # number of factors if(is.null(phi)) {model <- f %*% t(f)} else {model <- f %*% phi %*% t(f)} residual<- r - model r2 <- sum(r*r) rstar2 <- sum(residual*residual) result <- list(residual = residual) result$dof <- dof <- n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2) #r2.off <- r #diag(r2.off) <- 0 # r2.off <- sum(r2.off^2) r2.off <- r2 - tr(r) diag(residual) <- 0 if(is.null(np.obs)) {rstar.off <- sum(residual^2) result$ENull <- r2.off * n.obs #the empirical null model result$chi <- rstar.off * n.obs #this is the empirical chi square result$rms <- sqrt(rstar.off/(n*(n-1))) #this is the empirical rmsea result$nh <- n.obs if (result$dof > 0) {result$EPVAL <- pchisq(result$chi, result$dof, lower.tail = FALSE) result$crms <- sqrt(rstar.off/(2*result$dof) ) result$EBIC <- result$chi - result$dof * log(n.obs) result$ESABIC <- result$chi - result$dof * log((n.obs+2)/24) } else {result$EPVAL <- NA result$crms <- NA result$EBIC <- NA result$ESABIC <- NA} } else { rstar.off <- sum(residual^2 * np.obs) #weight the residuals by their sample size r2.off <-(r*r * np.obs) #weight the original by sample size r2.off <- sum(r2.off) -tr(r2.off) result$chi <- rstar.off #this is the sample size weighted chi square result$nh <- harmonic.mean(as.vector(np.obs)) #this is the sample weighted cell size result$rms <- sqrt(rstar.off/(result$nh*n*(n-1))) #this is the sample size weighted square root average squared residual if (result$dof > 0) {result$EPVAL <- pchisq(result$chi, result$dof, lower.tail = FALSE) result$crms <- sqrt(rstar.off/(2*result$nh*result$dof) ) result$EBIC <- result$chi - result$dof * log(result$nh) result$ESABIC <- result$chi - result$dof * log((result$nh+2)/24) } else { #added 2/28/2014 result$EPVAL <- NA result$crms <- NA result$EBIC <- NA result$ESABIC <- NA } } result$fit <-1-rstar2/r2 result$fit.off <- 1-rstar.off/r2.off result$sd <- sd(as.vector(residual)) #this is the none sample size weighted root mean square residual result$factors <- nfactors result$complexity <- (apply(f,1,function(x) sum(x^2)))^2/apply(f,1,function(x)sum(x^4)) diag(model) <- diag(r) model <- cor.smooth(model) #this replaces the next few lines with a slightly cleaner approach if(smooth) {r <- cor.smooth(r) } #this makes sure that the correlation is positive semi-definite #although it would seem that the model should always be positive semidefinite so this is probably not necessary #cor.smooth approach added August 25,2011 # } m.inv.r <- try(solve(model,r),silent=TRUE) #modified Oct 30, 2009 to perhaps increase precision -- #modified 2015/1/2 to use try if(inherits(m.inv.r,"try-error")) {warning("the model inverse times the r matrix is singular, replaced with Identity matrix which means fits are wrong") m.inv.r <- diag(1,n,n)} if(is.na(n.obs)) {result$n.obs=NA result$PVAL=NA} else {result$n.obs=n.obs} result$dof <- n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2) result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) -n #this is what Tucker Lewis call F if(is.infinite(result$objective)) {result$objective <- rstar2 message("The determinant of the smoothed correlation was zero.\nThis means the objective function is not defined.\nChi square is based upon observed residuals.")} result$criteria <- c("objective"=result$objective,NA,NA) if (!is.na(n.obs)) {result$STATISTIC <- chisq <- result$objective * ((n.obs-1) -(2 * n + 5)/6 -(2*nfactors)/3) #from Tucker and from factanal # if (!is.na(n.obs)) {result$STATISTIC <- chisq <- result$objective * ((n.obs-1)) #from Fox and sem if(!is.nan(result$STATISTIC)) if (result$STATISTIC <0) {result$STATISTIC <- 0} if (result$dof > 0) {result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE)} else {result$PVAL <- NA} } result$Call <- cl #find the Tucker Lewis Index of reliability #Also known as the NNFI which is expressed in terms of Chisq #NNFI <- (chisqNull/dfNull - chisq/df)/(chisqNull/dfNull - 1) #first find the null model F0 <- sum(diag((r))) - log(det(r)) -n if(is.infinite(F0)) {F0 <- r2 message("The determinant of the smoothed correlation was zero.\nThis means the objective function is not defined for the null model either.\nThe Chi square is thus based upon observed correlations.")} Fm <- result$objective #objective function of model Mm <- Fm/( n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2)) M0 <- F0* 2 /(n*(n-1)) nm <- ((n.obs-1) -(2 * n + 5)/6 -(2*nfactors)/3) # result$null.model <- F0 result$null.dof <- n * (n-1) /2 if (!is.na(n.obs)) {result$null.chisq <- F0 * ((n.obs-1) -(2 * n + 5)/6 ) result$TLI <- (M0 - Mm)/(M0 - 1/nm) #NNFI in Fox's sem if(is.numeric(result$TLI) & !is.nan(result$TLI) & (result$TLI >1)) result$F0 <-1 #The estimatation of RMSEA and the upper and lower bounds are taken from John Fox's summary.sem with minor modifications if(!is.null(result$objective) && (result$dof >0) &&(!is.na(result$objective))) { # RMSEA <- sqrt(max(result$objective/result$dof - 1/(n.obs-1), 0)) #this is x2/(df*N ) - 1/(N-1) #put back 4/21/17 #however, this is not quite right and should be RMSEA <- sqrt(max(chisq/(result$dof* n.obs) - 1/(n.obs-1), 0)) #this is x2/(df*N ) - 1/(N-1) #fixed 4/5/19 #note that the result$objective is not actually the chi square unless we adjust it ala Tucker #thus, the RMSEA was slightly off. This was fixed October 29, 2016 to be # RMSEA <- sqrt(max( (chisq/(result$dof * (n.obs))-1/(n.obs)),0)) #changed to this from above October 29, 2016 and then changed to N February 28, 2017 #Seem to have dropped the sqrt part of this at some point tail <- conf.level/2 #this had been incorrectly listed as (1-conf.level)/2 which gave extraordinarily narrow confidence boundaries, fixed August 25, 2011 N <- max <- n.obs df <- result$dof #chi.sq.statistic <- RMSEA^2 * df * (N - 1) + df #why isn't this just chi.sq? chi.sq.statistic <- chisq max <- max(n.obs,chi.sq.statistic) +2* n.obs #the alternative to this is to use the uniroot technique of Yves Rosseel in lavaan #### from Hao Wu # LB<-function(T){ # + if (pchisq(df=df,q=T)<=0.95) return(0) else # + sqrt(uniroot(function(x) {pchisq(df=df,ncp=x,q=T)-0.95},c(0,10000))$root/nstar/df) # + } # # > UB<-function(T){ # + if (pchisq(df=df,q=T)<=0.05) return(0) else # + sqrt(uniroot(function(x) {pchisq(df=df,ncp=x,q=T)-0.05},c(0,10000))$root/nstar/df) # + } ## #Finally implement February 2017 # upperlambda <- function(lam) {tail - pchisq(chi.sq.statistic, df, ncp=lam)^2 } RMSEA.U <- 0 #in case we can not find it if(pchisq(df=result$dof,q=result$STATISTIC) > tail){ RMSEA.U <- try( sqrt(uniroot(function(x) {pchisq(df=result$dof,ncp=x,q=result$STATISTIC)- tail},c(0,max))$root/(n.obs-1)/result$dof),silent=TRUE) if(inherits( RMSEA.U,"try-error")) {if(RMSEA <= 0 ) {RMSEA.U <- 0} else {message("In factor.stats, I could not find the RMSEA upper bound . Sorry about that") #if the fit is super good, then the chisq is too small to get an upper bound. Report it as 0. RMSEA.U <- NA}} } # lam.U <- NA} else {lam.U <- res} # # if (is.null(res) || is.na(res$objective) || res$objective < 0){ # # max <- 0 # # warning("cannot find upper bound of RMSEA") # # break # # } # # lowerlambda <- function(lam) {1- tail - pchisq(chi.sq.statistic, df, ncp=lam)^2 } RMSEA.L <- 0 #in case we can not find it if(pchisq(df=result$dof,q=result$STATISTIC) > (1-tail)) { RMSEA.L <- try( sqrt(uniroot(function(x) {pchisq(df=result$dof,ncp=x,q=result$STATISTIC)-1 + tail},c(0,max))$root/(n.obs-1)/result$dof) ,silent=TRUE) if(inherits(RMSEA.L,"try-error")) {#message("In factor.stats, I could not find the RMSEA lower bound . Sorry about that") RMSEA.L <- NA} } else {RMSEA.L <- 0} # lam.L <- 0} else {lam.L <- res} # # if (is.null(res) || is.na(res$objective) || res$objective < 0){ # # max <- 0 # # warning("cannot find lower bound of RMSEA") # # break # # } #However, this was giving the wrong results and so I implemented the following #suggested by Hao Wu April, 2017 #RMSEA.U <- sqrt(uniroot(function(x) {pchisq(df=result$dof,ncp=x,q=result$STATISTIC)- alpha},c(0,10000))$root/(n.obs-1)/result$dof) #RMSEA.L <- sqrt(uniroot(function(x) {pchisq(df=result$dof,ncp=x,q=result$STATISTIC)-1 + alpha},c(0,10000))$root/(n.obs-1)/result$dof) # while (max > 1){ # res <- try(optimize(function(lam) (tail - pchisq(chi.sq.statistic, df, ncp=lam))^2, interval=c(0, max)),silent=TRUE) # if(class(res)=="try-error") {message("In factor.stats, I could not find the RMSEA upper bound . Sorry about that") # res <- NULL} # if (is.null(res) || is.na(res$objective) || res$objective < 0){ # max <- 0 # warning("cannot find upper bound of RMSEA") # break # } # if (sqrt(res$objective) < tail/100) break # max <- max/2 # } # lam.U <- if (max <= 1) NA else res$minimum # # max <- max(max,lam.U) # max <- lam.U # if(is.na(max)) max <- N # while (max > 1){# this just iterates in to get a value # res <- try(optimize(function(lam) (1 - tail - pchisq(chi.sq.statistic, df, ncp=lam))^2, interval=c(0, max)),silent=TRUE) # if(class(res)=="try-error") {message("In factor.stats, I could not find the RMSEA lower bound. Sorry about that") # res <- NULL} # if (is.null(res)) {break} # if (sqrt(res$objective) < tail/100) break # max <- max/2 # if (is.na(res$objective) || res$objective < 0){ # max <- 0 # warning("cannot find lower bound of RMSEA") # break # } # } # # # lam.L <- if (max <= 1) NA else res$minimum #lam is the ncp # this RMSEA calculation is probably not right because it will sometimes (but rarely) give cis that don't include the estimate # RMSEA.U <- sqrt(lam.U/((N)*df) ) #lavaan uses sqrt(lam.U/((N)*df) ) sem uses sqrt(lam.U/((N-1)*df) ) # RMSEA.L <- min(sqrt(lam.L/((N)*df) ),RMSEA) if(!is.na(RMSEA.U) && RMSEA.U < RMSEA) RMSEA.U <- NA if(!is.na(RMSEA.L) && RMSEA.L > RMSEA) RMSEA.L <- NA result$RMSEA <- c(RMSEA, RMSEA.L, RMSEA.U, 1-conf.level) names(result$RMSEA) <- c("RMSEA","lower","upper","confidence") result$BIC <- chisq - df * log(N) result$SABIC <- chisq - df * log((N+2)/24) # added 1/27/2014 } } #now, find the correlations of the factor scores, even if not estimated, with the factors #this repeats what was done in factor.scores and does not take in\to account the options in factor.scores if(!is.null(phi)) f <- f %*% phi #convert the pattern to structure coefficients if(smooth) {r <- cor.smooth(r)} # w <- try(solve(r,f) ,silent=TRUE) #these are the regression factor weights w <- Pinv(r)%*%f #use the Pseudo inverse #added 4/22/23 if(inherits(w,"try-error")) {message("In factor.stats, the correlation matrix is singular, an approximation is used") ev <- eigen(r) if(is.complex(ev$values)) {warning("complex eigen values detected by factor stats, results are suspect") } else { ev$values[ev$values < .Machine$double.eps] <- 100 * .Machine$double.eps r <- ev$vectors %*% diag(ev$values) %*% t(ev$vectors) diag(r) <- 1 # w <- try(solve(r,f) ,silent=TRUE) w <- Pinv(r)%*% r #use the Pseudo inverse #these are the factor weights if(inherits(w,"try-error")) {warning("In factor.stats, the correlation matrix is singular, and we could not calculate the beta weights for factor score estimates") w <- diag(1,dim(r)[1]) } #these are the beta weights }} R2 <- diag(t(w) %*% f) #but, we actually already found this in factor scores -- these are the Thurstone values if(is.null(fm)) { if(prod(R2,na.rm=TRUE) < 0 ) {message("In factor.stats: The factor scoring weights matrix is probably singular -- Factor score estimate results are likely incorrect.\n Try a different factor score estimation method\n") R2[abs(R2) > 1] <- NA R2[R2 <= 0] <- NA } if ((max(R2,na.rm=TRUE) > (1 + .Machine$double.eps)) ) {warning("The estimated weights for the factor scores are probably incorrect. Try a different factor score estimation method.")} } r.scores <- cov2cor(t(w) %*% r %*% w) result$r.scores <- r.scores result$R2 <- R2 #this is the multiple R2 of the scores with the factors # result$R2.corrected <- factor.indeterm(r,f) # result$R2.total <- R2.cor$R2 # result$beta.total <- R2.cor$beta.total #coarse coding if(coarse){ keys <- factor2cluster(f) #added the not coarse option 8/25/23 covar <- t(keys) %*% r %*% keys if((nfactors >1) && (dim(covar)[2] >1 )) { sd.inv <- diag(1/sqrt(diag(covar))) cluster.correl <- sd.inv %*% covar %*% sd.inv #this is just cov2cor(covar) valid <- t(f) %*% keys %*% sd.inv result$valid <- diag(valid) if(NCOL(cluster.correl) < NCOL(r.scores)) {#we need to pad out the cluster.correl matrix so that fa.organize does not choke 9/2/20 temp <- cluster.correl n.temp <- NROW(temp) cluster.correl <- matrix(NA,ncol(r.scores),nrow(r.scores)) cluster.correl[1:n.temp,1:n.temp] <- temp } result$score.cor <- cluster.correl} else {sd.inv <- 1/sqrt(covar) if(dim(sd.inv)[1] == 1) sd.inv <- diag(sd.inv) valid <- try(t(f) %*% keys * sd.inv) result$valid <- valid} } else { result$score.cor <-NULL result$valid <- NULL} result$weights <- w #the beta weights for factor scores class(result) <- c("psych","stats") return(result) } psych/R/matrix.addition.R0000644000176200001440000000145412767251554015044 0ustar liggesusers#matrix.addition "%+%" <- function(x,y) { if(!is.matrix(x)) { if(is.vector(x)) {x <- as.matrix(x)} else stop("x must be either a vector or a matrix")} if(!is.matrix(y)) { if(is.vector(y)) {y <- as.matrix(y)} else stop("y must be either a vector or a matrix")} n.x <- dim(x)[1] n.y <- dim(y)[2] n.k <- dim(x)[2] if (n.k != dim(y)[1]) {warning("Matrices should be comparable")} #first find sum vectors x <- rowSums(x,na.rm=FALSE) y <- colSums(y,na.rm=FALSE) one <- as.vector(rep(1,n.y)) #to duplicate x n.y times one.y <- as.vector(rep(1,n.x)) #to duplicate y n.x times xy <- x %*% t(one) + t(y %*% t(one.y) ) #sum the vectors in a rectangular array return(xy) } "tr" <- function(m) { if (!is.matrix(m) |(dim(m)[1] != dim(m)[2]) ) stop ("m must be a square matrix") return(sum(diag(m),na.rm=TRUE)) }psych/R/imputedR.r0000644000176200001440000000142013663337540013564 0ustar liggesusers# A simple function to impute missing data correlations #we find the mean r for each row and column and replace the missing value with geometric mean of the row and column for the missing value #we can either find the colmean correlation or the median #May 7, 2020 imputeR <- function(r,mean=TRUE,z=TRUE){ diag(r) <- NA if(z){r <- fisherz(r)} if(mean) { rowR <- rowMeans(r,na.rm=TRUE) colR <- colMeans(r,na.rm=TRUE) } else { rowR <- apply(r,1,median,na.rm=TRUE) colR <- apply(r,2,median,na.rm=TRUE)} if(z) {rowR <- fisherz2r(rowR) colR <- fisherz2r(colR)} imputed <- (rowR %o% colR) #the outer product of the mean correlations diag(imputed) <- NA r [is.na(r)] <- sqrt(imputed[is.na(r)]) diag(r) <- 1 r [is.na(r) ] <- -sqrt(abs(imputed[is.na(r)])) return(r) } psych/R/circ.sim.plot.R0000644000176200001440000000201111134463500014377 0ustar liggesusers"circ.sim.plot" <- function(x.df) { with(x.df,{ symb <- c(21,22,20) colors <- c("black","blue", "red") op <- par(mfrow=c(2,2)) plot(c.gap,c.RT,xlim=c(0,.5),ylim=c(0,1),pch=symb[1],col=colors[1],xlab="Gap Test",ylab="Rotation Test",main="Gap x Rotation") points(s.gap,s.RT,pch=symb[2],col=colors[2]) points(e.gap,e.RT,pch=symb[3],col=colors[3]) plot(c.gap,c.fisher,xlim=c(0,.5),ylim=c(0,.50),pch=symb[1],col=colors[1],xlab="Gap Test",ylab="Fisher Test",main="Gap x Fisher") points(s.gap,s.fisher,pch=symb[2],col=colors[2]) points(e.gap,e.fisher,pch=symb[3],col=colors[3]) plot(c.fisher,c.RT,xlim=c(0,.5),ylim=c(0,1),pch=symb[1],col=colors[1],xlab="Fisher Test",ylab="Rotation Test",main="Fisher x Rotation") points(s.gap,s.RT,pch=symb[2],col=colors[2]) points(e.gap,e.RT,pch=symb[3],col=colors[3]) boxplot(x.df,main=" Box Plot of all tests") title(main = "Circumplex Tests for Circumplex, Ellipsoid, and Simple Structure",outer=TRUE,line=-1) }) #end of with op <- par(mfrow=c(1,1)) }psych/R/cor.ci.R0000644000176200001440000002031114363404053013100 0ustar liggesusers #Pearson or polychoric correlations with confidence intervals "cor.ci" <- function(x, keys = NULL, n.iter = 100, p = 0.05, overlap=FALSE, poly = FALSE, method = "pearson",plot=TRUE,minlength=5,n=NULL,...) { corCi(x=x, keys = keys, n.iter = n.iter, p = p, overlap=overlap, poly = poly, method = method,plot=plot,minlength=minlength,n=n,...) } "corCi" <- function(x,keys = NULL, n.iter = 100, p = 0.05, overlap=FALSE, poly = FALSE, method = "pearson",plot=TRUE,minlength=5,n = NULL,...) { cl <- match.call() n.obs <- dim(x)[1] if(!isCorrelation(x)) {#the normal case is to have data and find the correlations and then bootstrap them #added the direct from correlation matrix option, August 17, 2019 since I was finding them for statsBy if(is.null(keys) && overlap) overlap <- FALSE #can not correct for overlap with just items if(poly) { #find polychoric or tetrachoric correlations if desired ncat <- 8 nvar <- dim(x)[2] tx <- table(as.matrix(x)) if(dim(tx)[1] ==2) {tet <- tetrachoric(x) typ = "tet"} else { #should we do mixed correlations? tab <- apply(x,2,function(x) table(x)) if(is.list(tab)) {len <- lapply(tab,function(x) length(x))} else {len <- dim(tab)[1] } dvars <- subset(1:nvar,len==2) #find the dichotomous variables pvars <- subset(1:nvar,((len > 2) & (len <= ncat))) #find the polytomous variables cvars <- subset(1:nvar,(len > ncat)) #find the continuous variables (more than ncat levels) if(length(pvars)==ncol(x)) {tet <- polychoric(x) typ = "poly"} else {tet <- mixedCor(x) typ="mixed" } } Rho <- tet$rho #Rho is the big correlation of all of items } else { Rho <- cor(x,use="pairwise",method=method) #the normal Pearson correlations } #now, if there are keys, find the correlations of the scales if(!is.null(keys)) {bad <- FALSE if(!is.matrix(keys)) keys <- make.keys(x,keys) #handles the new normal way of just passing a keys list if(any(is.na(Rho))) {warning(sum(is.na(Rho)), " of the item correlations are NA and thus finding scales that include those items will not work.\n We will try to do it for those scales which do not include those items. \n Proceed with caution. ") bad <- TRUE rho <- apply(keys,2,function(x) colMeans(apply(keys,2,function(x) colMeans(Rho*x,na.rm=TRUE))*x,na.rm=TRUE)) #matrix multiplication without matrices! #switched to using colMeans instead of colSums, recognizing the problem of different number of items being dropped. } else { rho <- t(keys) %*% Rho %*% keys} } else {rho <- Rho} #find the covariances between the scales # ##correct for overlap if necessary on the original data if(overlap) { key.var <- diag(t(keys) %*% keys) var <- diag(rho) #these are the scale variances n.keys <- ncol(keys) key.av.r <- (var - key.var)/(key.var * (key.var-1)) item.cov <- t(keys) %*% Rho #this will blow up if there are bad data raw.cov <- item.cov %*% keys adj.cov <- raw.cov for (i in 1:(n.keys)) { for (j in 1:i) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] * sqrt(key.av.r[i] * key.av.r[j])) } } diag(adj.cov) <- diag(raw.cov) rho <- cov2cor(adj.cov) } rho <- cov2cor(rho) #scale covariances to correlations nvar <- dim(rho)[2] if(n.iter > 1) { replicates <- list() rep.rots <- list() ##now replicate it to get confidence intervals #replicates <- lapply(1:n.iter,function(XX) { replicates <- mclapply(1:n.iter,function(XX) { xs <- x[sample(n.obs,n.obs,replace=TRUE),] {if(poly) { if(typ!= "tet") {tets <- mixedCor(xs)} else {tets <- tetrachoric(xs)} R <- tets$rho} else {R <- cor(xs,use="pairwise",method=method)} #R is the big correlation matrix if(!is.null(keys)) { if (bad) {covariances <- apply(keys,2,function(x) colMeans(apply(keys,2,function(x) colMeans(R*x,na.rm=TRUE))*x,na.rm=TRUE)) #matrix multiplication without matrices! } else { covariances <- t(keys) %*% R %*% keys} r <- cov2cor(covariances) } else {r <- R} #correct for overlap if this is requested if(overlap) { var <- diag(covariances) item.cov <- t(keys) %*% R raw.cov <- item.cov %*% keys adj.cov <- raw.cov key.av.r <- (var - key.var)/(key.var * (key.var-1)) for (i in 1:(n.keys)) { for (j in 1:i) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] * sqrt(key.av.r[i] * key.av.r[j])) } } diag(adj.cov) <- diag(raw.cov) r <- cov2cor(adj.cov) #fixed 03/12/14 } rep.rots <- r[lower.tri(r)] } } ) } replicates <- matrix(unlist(replicates),ncol=nvar*(nvar-1)/2,byrow=TRUE) z.rot <- fisherz(replicates) means.rot <- colMeans(z.rot,na.rm=TRUE) sds.rot <- apply(z.rot,2,sd, na.rm=TRUE) sds.rot <- fisherz2r(sds.rot) ci.rot.lower <- means.rot + qnorm(p/2) * sds.rot #This is the normal value of the observed distribution ci.rot.upper <- means.rot + qnorm(1-p/2) * sds.rot means.rot <- fisherz2r(means.rot) ci.rot.lower <- fisherz2r(ci.rot.lower) ci.rot.upper <- fisherz2r(ci.rot.upper) low.e <- apply(replicates,2,quantile, p/2,na.rm=TRUE) up.e <- apply(replicates, 2, quantile, 1-p/2,na.rm=TRUE) tci <- abs(means.rot)/sds.rot ptci <- 1- pnorm(tci) #subtract from 1 (added 11/14/20) ci.rot <- data.frame(lower=ci.rot.lower,low.e=low.e,upper=ci.rot.upper,up.e=up.e,p =2*(ptci)) #dropped the 1-ptci 11/14/20 cnR <- abbreviate(colnames(rho),minlength=minlength) k <- 1 for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { rownames(ci.rot)[k] <- paste(cnR[i],cnR[j],sep="-") k<- k +1 }} results <- list(rho=rho, means=means.rot,sds=sds.rot,tci=tci,ptci=ptci,ci=ci.rot,Call= cl,replicates=replicates) #if(plot) {cor.plot.upperLowerCi(results,numbers=TRUE,cuts=c(.001,.01,.05),...) } #automatically plot the results if(plot) {cor.plot(rho,numbers=TRUE,cuts=c(.001,.01,.05),pval = 2*(1-ptci),...) } class(results) <- c("psych","cor.ci") return(results) } else {#we have been given correlations, just find the cis. if(is.null(n)) {warning("\nFinding confidence intervals from a correlation matrix, but n is not specified, arbitrarily set to 1000") n <- 1000} results <- cor.Ci(x,n=n, alpha=p, minlength=minlength) results$ci <- results$r.ci results$r <- x class(results) <- cs(psych, corCi) return(results) } } #written Sept 20, 2013 #adapted from fa.poly #modified May 1, 2014 to scale by pvals #modified August 24, 2017 to include Bonferoni corrections from cor.test #modified January 22, 2023 to find correlations first if given raw data "corPlotUpperLowerCi" <- "cor.plot.upperLowerCi" <- function(R,numbers=TRUE,cuts=c(.001,.01,.05),select=NULL,main="Upper and lower confidence intervals of correlations",adjust=FALSE,...) { cor.cip <- NULL names <- cs(corCi, cor.cip, corr.test ,cor.ci) value <- inherits(R,names,which=TRUE) # value <- class(x)[2] if(max(value)==0) {#find the correlations first if it is not a correlation matrix if(isCorrelation(R)) {R <- corCi(R)} else {if(NROW(R)!= NCOL(R)) {R <-corr.test(R)} # } else { browser() # stop("I am stopping because the input is incorrect \ndata must either be a raw data matrix or the output of corr.test.")} }} if(adjust) {lower <- R$ci.adj$lower.adj upper <- R$ci.adj$upper.adj} else { lower <- R$ci$lower upper <- R$ci$upper} temp <- lower if(is.null(R$r)) {cn = colnames(R$rho) rl <- R$rho[lower.tri(R$rho)]} else { cn = colnames(R$r) rl <- R$r[lower.tri(R$r)]} #is input from cor.ci or corr.test lower[rl < 0 ] <- upper[rl < 0] upper[rl < 0] <- temp[rl < 0] m <- length(lower) n <- floor((sqrt(1 + 8 * m) +1)/2) X <- diag(n) X[lower.tri(X)] <- upper X <- t(X) X[lower.tri(X)] <- lower diag(X) <- 1 colnames(X) <- rownames(X) <- cn if(is.null(R$ptci)) {pval <- R$p} else {pval = 2*(1-R$ptci)} corPlot(X,numbers=numbers,pval=pval,cuts=cuts,select=select,main=main,...) class(X) <- c("psych","cor.cip") colnames(X) <- abbreviate(rownames(X,4)) invisible(X) } psych/R/dia.arrow.r0000644000176200001440000000544414463753412013672 0ustar liggesusers"dia.arrow" <- function (from, to, labels = NULL, scale = 1, cex = 1, adj = 2, both = FALSE, pos = NULL, l.cex, gap.size = NULL, draw = TRUE, col = "black", lty = "solid", ...) { if (missing(gap.size)) gap.size <- 0.2 if (missing(l.cex)) l.cex <- cex text.values <- NULL radius1 <- radius2 <- 0 if (!is.list(to)) { tocenter <- to } else { tocenter <- to$center } if (is.list(from)) { if (!is.null(from$radius)) { radius1 <- from$radius radius2 <- 0 } if (!is.null(from$center)) from <- from$center } if (is.list(to)) { if (!is.null(to$radius)) { radius2 <- to$radius to <- to$center } } theta <- atan((from[2] - to[2])/(from[1] - to[1])) costheta <- cos(theta) sintheta <- sin(theta) dist <- sqrt((to[1] - from[1])^2 + (to[2] - from[2])^2) if ((adj > 3) || (adj < 1)) { x <- (to[1] + from[1])/2 y <- (to[2] + from[2])/2 } else { x <- from[1] - sign(from[1] - to[1] + 0.001) * (4 - adj) * costheta * dist/4 y <- from[2] - sign(from[1] - to[1] + 0.001) * (4 - adj) * sintheta * dist/4 } if (is.null(labels)) { h.size <- 0 } else { h.size <- nchar(labels) * cex * gap.size } if (is.null(labels)) { v.size <- 0 } else { v.size <- cex * 0.7 * gap.size } if (from[1] < to[1]) { h.size <- -h.size radius1 <- -radius1 radius2 <- -radius2 } x0 <- from[1] - costheta * radius1 xr <- x + h.size * costheta * v.size y0 <- from[2] - sintheta * radius1 xl <- x - h.size * costheta * v.size yr <- y + v.size * sintheta * h.size yl <- y - v.size * sintheta * h.size xe <- to[1] + costheta * radius2 ye <- to[2] + sintheta * radius2 if (!is.null(labels)) { if (draw) { text(x, y, labels, cex = l.cex, pos = pos, ...) text.values <- NULL } else { if (is.null(pos)) pos <- 0 text.values <- c(x, y, labels, pos = pos, cex = l.cex) } } if (draw) { arrows(x0, y0, xr, yr, length = (both + 0) * 0.1 * scale, angle = 30, code = 1, col = col, lty = lty, ...) arrows(xl, yl, xe, ye, length = 0.1 * scale, angle = 30, code = 2, col = col, lty = lty, ...) arrow.values <- NULL } else { arrow.values <- list(x0, y0, xr, yr, length = (both + 0) * 0.1 * scale, angle = 30, code = 1, xl, yl, xe, ye, length2 = 0.1 * scale, angle2 = 30, code2 = 2, col = col, lty = lty, ...) } invisible(list(text.values = text.values, arrow.values = arrow.values)) } psych/R/multilevel.reliability.R0000644000176200001440000004431214444434354016433 0ustar liggesusers#Developed February, 2017 #closely follows chapters by Pat Shrout and Sean Lane in terms of the statistics "multilevel.reliability" <- "mlr" <- function(x,grp="id",Time="time",items=c(3:5),alpha=TRUE,icc=FALSE,aov=TRUE,lmer=FALSE, lme = TRUE,long=FALSE,values=NA,na.action="na.omit",plot=FALSE, main="Lattice Plot by subjects over time") { cl <- match.call() s.lmer <- lmer.MS <- MS_id <- s.aov <-NULL MS.df <- data.frame( matrix(NA,nrow=8,ncol=2)) #a filler in case we don't have it colnames(MS.df) <- c("Variance","Percent") if(!long) {#the normal case is wide data which we analyze and then convert to long #first check if we should reverse any items and convert location numbers (if specified) to location names n.items <- length(items) if(is.character(items)) { temp <- rep(1,n.items) temp [strtrim(items,1)=="-"] <- -1 if(any(temp < 0) ) {items <- sub("-","",items) } } else {temp <- sign(items) items <- colnames(x)[abs(items)] } if(any(temp < 0)) { min.item <- min(x[items],na.rm=TRUE) max.item <- max(x[items],na.rm=TRUE) x[items[temp <0]] <- max.item- x[items[temp <0]] + min.item } wide <- x[items] rwname <- unique(x[grp]) n.obs <- nrow(rwname) n.time <- length(table(x[Time])) n.items <- ncol(wide) if(alpha &(n.time > 2)) { alpha.by.person <- by(x,x[grp],function(x) alphaBy(x[items],x[grp])) rnames <- paste0("ID",names(alpha.by.person)) alpha.by.person <- matrix(unlist(alpha.by.person),ncol=5,byrow=TRUE) colnames(alpha.by.person) <- c("Raw alpha","Std. alpha","av.r","signal/noise","bad cases") rownames(alpha.by.person) <- rnames } else {alpha.by.person <- NA} if(icc) { #this takes a long time, and can be skipped icc.by.person <- by(x,x[grp],function(x) ICC(x[items])) icc.by.time <- by(x,x[Time],function(x) ICC(x[items])) icc.person.sum <- matrix(NA,nrow=n.obs,ncol=6) #add more columns when we think about what to put there icc.time.sum <- matrix(NA,nrow=n.time,ncol=6) for(person in 1:n.obs) {icc.person.sum[person,] <- c(icc.by.person[[person]][["results"]][["ICC"]][c(3,6)], icc.by.person[[person]][["results"]][["lower bound"]][3],icc.by.person[[person]][["results"]][["upper bound"]][3], icc.by.person[[person]][["results"]][["lower bound"]][6],icc.by.person[[person]][["results"]][["upper bound"]][6]) } rownames(icc.person.sum) <- paste0("ID",names(icc.by.person)) colnames(icc.person.sum) <- c("ICC single" ,"ICC summed","Lower Bound ICC13","Upper Bound ICC13", "Lower Bound ICC23","Upper Bound ICC23") for(time in 1:n.time) {icc.time.sum[time,] <- c(icc.by.time[[time]][["results"]][["ICC"]][c(3,6)], icc.by.time[[time]][["results"]][["lower bound"]][3],icc.by.time[[time]][["results"]][["upper bound"]][3], icc.by.time[[time]][["results"]][["lower bound"]][6],icc.by.time[[time]][["results"]][["upper bound"]][6]) } rownames(icc.time.sum) <- paste0("time",1:n.time) colnames(icc.time.sum) <- c("ICC single" ,"ICC summed","Lower Bound ICC13","Upper Bound ICC13", "Lower Bound ICC23","Upper Bound ICC23") } else {icc.by.person <- icc.by.time <- icc.time.sum <- icc.person.sum <- NA} #this next part treats the possibility of missing times long <- NULL long.list <- by(x,x[grp],function(xx) {xx y.df <- data.frame(id = as.factor(xx[,grp]), time=as.factor(xx[,Time]),stack(xx[items])) }) for (i in 1:n.obs) {long <- rbind(long,long.list[[i]]) } colnames(long)[4] <- "items" } else {#we have long data already, but need to add a few values to make it work long <- x n.items <- length(table(long[items])) #this does not n.obs <- length(table(long[grp])) n.time <- length(table(long[Time])) long <- long[c(grp,Time,items,values)] colnames(long) <- c("id","time","items","values") alpha.by.person <- icc.person.sum <- icc.time.sum <- icc.by.person <- icc.by.time <- NULL } if(lmer) { if (!requireNamespace('lme4')) {stop("I am sorry, to do a NREML requires the lme4 package to be installed")} mod.lmer <- lme4::lmer(values ~ 1 + (1 | id) + (1 | time) + (1 | items) + (1 | id:time)+ (1 | id:items)+ (1 | items :time), data=long,na.action=na.action) #might want to add control option, but probably not needed vc <- lme4::VarCorr(mod.lmer) MS_id <- vc$id[1,1] MS_time <- vc$time[1,1] MS_items <- vc$items[1,1] # MS_pxt <- vc[[1]][[1]] #replaced with actual names rather than locations # MS_pxitem <- vc[[2]][[1]] # MS_txitem <- vc[[3]][[1]] MS_pxt <- vc[["id:time"]][[1]] MS_pxitem <- vc[["id:items"]][[1]] MS_txitem <- vc[["items:time"]][[1]] error <- MS_resid <- (attributes(vc)$sc)^2 s.lmer <- s.aov <- summary(mod.lmer) MS.df <- data.frame(variance= c(MS_id, MS_time ,MS_items, MS_pxt, MS_pxitem, MS_txitem, MS_resid,NA)) rownames(MS.df) <- c("ID","Time","Items","ID x time", "ID x items", "time x items", "Residual","Total") MS.df["Total",] <- sum(MS.df[1:7,1],na.rm=TRUE) MS.df["Percent"] <- MS.df/MS.df["Total",1] lmer.MS <- MS.df #save these } if(aov) { aov.x <- aov(values ~ id + time + items + time * id + time * items + items * id , data = long) s.aov <- summary(aov.x) stats <- matrix(unlist(s.aov),ncol=5, byrow=FALSE) colnames(stats) <- c("df","SS", "MS", "F", "p") rownames(stats) <- c("id","time","items", "id x time","time x items","id x items","residuals") MS_id <- (stats["id","MS"] - stats["id x time","MS"] - stats["id x items","MS"] + stats["residuals","MS"]) /( n.time * n.items) MS_time <- (stats["time","MS"] - stats["id x time","MS"] - stats["time x items","MS"] + stats["residuals","MS"])/(n.obs*n.items) MS_items <- (stats["items","MS"] - stats["id x items","MS"] - stats["time x items","MS"] + stats["residuals","MS"])/(n.obs*n.time) MS_pxt <- (stats["id x time", "MS"] - stats["residuals","MS"])/( n.items) MS_pxitem <- (stats["id x items", "MS"] - stats["residuals","MS"])/( n.time) MS_txitem <- (stats["time x items", "MS"] - stats["residuals","MS"])/(n.obs) MS.df <- data.frame(variance= c(MS_id, MS_time ,MS_items, MS_pxt, MS_pxitem, MS_txitem, stats["residuals","MS"],NA)) error <- stats["residuals","MS"] if(any(MS.df[1:7,1] < 0) & is.null(lmer.MS)){ warning("Some of the variance estimates from ANOVA are negative. This is probably due to missing values and an unbalanced design. You should consider using the lme option") } else {if(!is.null(lmer.MS)) { MS_id <- lmer.MS[1,1] MS_time <- lmer.MS[2,1] MS_items <- lmer.MS[3,1] MS_pxt <- lmer.MS[4,1] MS_pxitem <- lmer.MS[5,1] MS_txitem <- lmer.MS[6,1] error <- MS_resid <- lmer.MS[7,1] MS.df[1] <- lmer.MS[1] }} rownames(MS.df) <- c("ID","Time","Items","ID x time", "ID x items", "time x items", "Residual","Total") MS.df["Total",] <- sum(MS.df[1:7,1],na.rm=TRUE) MS.df["Percent"] <- MS.df/MS.df["Total",1] } if(!is.null(MS_id)) { #now find the reliabilities -- note the typo in equation 7 in Lane and Shrout Rkf <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + error/(n.time * n.items))) R1r <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + MS_time + MS_pxt + error/( n.items))) #per Sean Lane Rkr <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + MS_time/n.time + MS_pxt/n.time + error/( n.time * n.items))) Rc <- (MS_pxt)/(MS_pxt + error/n.items) } else { Rkf <- R1r <- Rkr <-Rc <- NULL} if(lme | lmer) { #we find these using a nested structure from lme or lmer (if available) if(!lme & lmer) {#use lmer to do the nested test mod.lmer <- lme4::lmer(values ~ 1 + (1 | id/time),data=long,na.action=na.action) s.lme <- summary(mod.lmer) vc <-lme4::VarCorr(mod.lmer) vid <- vc$id[1,1] vtime_id <- vc$time[1,1] vres <- (attributes(vc)$sc)^2 } else {#use lme to do the nested test mod.lme <- nlme::lme(values ~ 1 , random = list(id =~ 1 ,time =~ 1 | id:items), data=long,na.action=na.action) s.lme <- summary(mod.lme) vc <- suppressWarnings(matrix(as.numeric(nlme::VarCorr(mod.lme)),ncol=2)) vid <- vc[2,1] vtime_id <- vc[4,1] vres <- vc[5,1]} Rkrn <- vid/(vid + vtime_id/(n.time) + vres/(n.time * n.items)) #this are the nested terms Rcn <- vtime_id/ (vtime_id + vres/n.items) MS.df ["id",1] <- vid MS.df ["id(time)",1] <- vtime_id MS.df["residual",1] <- vres MS.df["total",1] <- vid + vtime_id + vres MS.df ["id",2] <- vid/ MS.df["total",1] MS.df ["id(time)",2] <- vtime_id/MS.df["total",1] MS.df["residual",2] <- vres/MS.df["total",1] MS.df["total",2] <- MS.df["total",1]/MS.df["total",1] } else {MS.df ["id",1] <- MS.df ["id(time)",1] <- MS.df["residual",1] <- MS.df["total",1] <- MS.df ["id",2] <- MS.df ["id(time)",2] <- MS.df["residual",2] <- NA MS.df["total",2] <- MS.df["total",1]/MS.df["total",1] s.lme <- Rkrn <- Rcn <- NULL} # if(aov || lmer ||lme) {result <- list(n.obs = n.obs, n.time = n.time, n.items=n.items, components = MS.df,RkF =Rkf,R1R = R1r,RkR = Rkr,Rc=Rc,RkRn=Rkrn,Rcn = Rcn, ANOVA=s.aov,s.lmer =s.lmer,s.lme= s.lme,alpha=alpha.by.person, summary.by.person = icc.person.sum,summary.by.time=icc.time.sum, ICC.by.person = icc.by.person,ICC.by.time=icc.by.time,lmer=lmer,long = long,Call=cl) } else {result <- list(n.obs = n.obs, n.time = n.time, n.items=n.items,alpha=alpha.by.person,lmer=lmer,long=long,Call=cl) } if(plot) { plot1<- xyplot(values ~ time | id, group=items, data=long, type = "b",as.table=TRUE,strip=strip.custom(strip.names=TRUE,strip.levels=TRUE),col=c("blue","red","black","grey"),main=main) print(plot1)} class(result) <- c("psych","multilevel") return(result) } "print_psych.multilevel" <- function(x,digits=2,all=FALSE,short=TRUE) { cat("\nMultilevel Generalizability analysis ",x$title," \n") cat("Call: ") print(x$Call) cat("\nThe data had ",x$n.obs, " observations taken over ", x$n.time ," time intervals for ", x$n.items, "items.\n") mat <- list(n.obs = x$n.obs,n.time = x$n.time,n.items = x$n.items) #save these cat("\n Alternative estimates of reliability based upon Generalizability theory\n") if(!is.null(x$RkF)){ cat("\nRkF = ",round(x$RkF,digits) , "Reliability of average of all ratings across all items and times (Fixed time effects)") mat["RkF"] <- x$RkF} if(!is.null(x$R1R)) {cat("\nR1R = ",round(x$R1R,digits),"Generalizability of a single time point across all items (Random time effects)") mat["R1R"] <- x$R1R} if(!is.null(x$RkR)) {cat("\nRkR = ",round(x$RkR,digits),"Generalizability of average time points across all items (Random time effects)") mat["RkR"] <- x$RkR} if(!is.null(x$Rc)) {cat("\nRc = ",round(x$Rc,digits),"Generalizability of change (fixed time points, fixed items) ") mat["Rc"] <- x$Rc} if(!is.null(x$RkRn) ) {cat("\nRkRn = ",round(x$RkRn,digits),"Generalizability of between person differences averaged over time (time nested within people)") mat["RkRn"] <- x$RkRn} if(!is.null(x$Rcn)) {cat("\nRcn = ",round(x$Rcn,digits),"Generalizability of within person variations averaged over items (time nested within people)") mat["Rcn"] <- x$Rcn} if(!x$lmer && !is.null(x$RkF) ) {cat("\n\n These reliabilities are derived from the components of variance estimated by ANOVA \n") if(!is.null(x$components)) { if(!any(is.na(x$components[1:8,1])) & any(x$components[1:8,1] < 0)) { warning("The ANOVA based estimates are suspect, probably due to missing data, try using lmer")} }} else { if(x$lmer ) { cat("\n\n These reliabilities are derived from the components of variance estimated by lmer \n")}} if(!is.null(x$components) && !is.na(x$components[1,1])) { print(round(x$components[1:8,],digits=digits)) mat["components"] <- list( x$components[1:8,1])} if(!is.null(x$components) && !is.na(x$components[9,1] )) { if(!x$lmer) {cat("\n The nested components of variance estimated from lme are:\n")} else {cat("\n The nested components of variance estimated from lmer are:\n")} print(x$components[9:12,],digits=digits) mat["lmer"] = list(x$components[9:12,1])} else {cat("\nNested components were not found because lme was not used\n")} if(!short) {cat("\n\n Three way ANOVA or lmer analysis \n") print(x$ANOVA,digits=digits) cat("\nvariance components from lme(r)\n") print(x$s.lme,digits=digits) cat("\n Alpha reliability by subjects)\n") print(x$alpha,digits) } if(all) { cat("\n Intraclass Correlations by subjects (over time and items) \n") print(x$summary.by.person,digits) cat("\n Intraclass Correlations by time (over subjects and items) \n") print(x$summary.by.time,digits) } if(short) { cat("\nTo see the ANOVA and alpha by subject, use the short = FALSE option.")} if(!all) {cat("\n To see the summaries of the ICCs by subject and time, use all=TRUE")} cat("\n To see specific objects select from the following list:\n",names(x)[-c(1:10)]) invisible(mat) } "alphaBy" <- function(x,subject) { n <- dim(x)[2] C <- cov(x,use="pairwise") R <- cov2cor(C) alpha.raw <- (1- tr(C)/sum(C))*(n/(n-1)) sumR <- sum(R,na.rm=TRUE) alpha.std <- (1- n/sumR)*(n/(n-1)) av.r <- (sumR-n)/(n*(n-1)) sn <- n*av.r/(1-av.r) if(is.na(sum(R))) {message("At least one item had no variance when finding alpha for subject = ",subject[1,],". Proceed with caution") bad <- 1} else {bad <- 0} result <- list(raw=alpha.raw,std=alpha.std,av.r=av.r,sn =sn,bad=bad) return(result) } "mlArrange" <- function(x,grp="id",Time="time",items=c(3:5),extra=NULL) { n.items <- length(items) if(is.character(items)) { temp <- rep(1,n.items) temp [strtrim(items,1)=="-"] <- -1 if(any(temp < 0) ) {items <- sub("-","",items) } } else {temp <- sign(items) items <- colnames(x)[abs(items)] } if(any(temp < 0)) { min.item <- min(x[items],na.rm=TRUE) max.item <- max(x[items],na.rm=TRUE) x[items[temp <0]] <- max.item- x[items[temp <0]] + min.item } wide <- x[items] rwname <- unique(x[grp]) n.obs <- nrow(rwname) n.time <- nrow(unique(x[Time])) n.items <- ncol(wide) long <- NULL if(is.null(extra)) { long.list <- by(x,x[grp],function(xx) {xx y.df <- data.frame(id = (xx[,grp]), time=(xx[,Time]),stack(xx[items])) }) for (i in 1:n.obs) {long <- rbind(long,long.list[[i]]) } colnames(long)[4] <- "items" } else { long.list <- by(x,x[grp],function(xx) {xx y.df <- cbind((xx[,grp]), (xx[,Time]),stack(xx[items]), (xx[,extra]),row.names=NULL) }) for (i in 1:n.obs) {long <- rbind(long,long.list[[i]]) } colnames(long)[1:4] <- c("id","time","values", "items") colnames(long)[5:(4+length(extra))] <- colnames(x)[extra] } results <- long return(long) } "mlPlot" <- function(x,grp="id",Time="time",items=c(3:5),extra=NULL,col=c("blue","red","black","grey"),type="b",main="Lattice Plot by subjects over time",...) { long <- mlArrange(x =x,grp=grp,Time=Time,items=items,extra=extra) plot1<- xyplot(values ~ time | id, group=items, data=long, type = type,as.table=TRUE,strip=strip.custom(strip.names=TRUE,strip.levels=TRUE),col=col,main=main,...) print(plot1) invisible(long) } "print_psych.multilevel.mat" <- function(x,digits=2,all=FALSE,short=TRUE) { cat("\nMultilevel Generalizability analysis ",x$title," \n") if(length(x) < 21 ) items <- length(x) temp <- matrix(unlist(x),ncol=items,byrow=FALSE) rownames(temp) <- c("n.obs","n.time","n,items","RkF","R1R","RkR","Rc","RkRn","Rcn","ID","Time","Items","ID x Time","ID x items","Time x items","residual","Total","Id","ID (time)","residual","total") cat("\nThe data had ",x$n.obs, " observations taken over ", x$n.time ," time intervals for ", x$n.items, "items.\n") mat <- list(n.obs = x$n.obs,n.time = x$n.time,n.items = x$n.items) #save these cat("\n Alternative estimates of reliabilty based upon Generalizability theory\n") if(!is.null(x$RkF)){ cat("\nRkF = ",round(x$RkF,digits) , "Reliability of average of all ratings across all items and times (Fixed time effects)") mat["RkF"] <- x$RkF} if(!is.null(x$R1R)) {cat("\nR1R = ",round(x$R1R,digits),"Generalizability of a single time point across all items (Random time effects)") mat["R1R"] <- x$R1R} if(!is.null(x$RkR)) {cat("\nRkR = ",round(x$RkR,digits),"Generalizability of average time points across all items (Random time effects)") mat["RkR"] <- x$RkR} if(!is.null(x$Rc)) {cat("\nRc = ",round(x$Rc,digits),"Generalizability of change (fixed time points, fixed items) ") mat["Rc"] <- x$Rc} if(!is.null(x$RkRn) ) {cat("\nRkRn = ",round(x$RkRn,digits),"Generalizability of between person differences averaged over time (time nested within people)") mat["RkRn"] <- x$RkRn} if(!is.null(x$Rcn)) {cat("\nRcn = ",round(x$Rcn,digits),"Generalizability of within person variations averaged over items (time nested within people)") mat["Rcn"] <- x$Rcn} if(!x$lmer && !is.null(x$RkF) ) {cat("\n\n These reliabilities are derived from the components of variance estimated by ANOVA \n") if(!is.null(x$components)) { if(!any(is.na(x$components[1:8,1])) & any(x$components[1:8,1] < 0)) { warning("The ANOVA based estimates are suspect, probably due to missing data, try using lmer")} }} else { if(x$lmer ) { cat("\n\n These reliabilities are derived from the components of variance estimated by lmer \n")}} if(!is.null(x$components) && !is.na(x$components[1,1])) { print(round(x$components[1:8,],digits=digits)) mat["components"] <- list( x$components[1:8,])} if(!is.null(x$components) && !is.na(x$components[9,1] )) { if(!x$lmer) {cat("\n The nested components of variance estimated from lme are:\n")} else {cat("\n The nested components of variance estimated from lmer are:\n")} print(x$components[9:12,],digits=digits) mat["lmer"] = list(x$components[9:12,])} else {cat("\nNested components were not found because lme was not used\n")} if(!short) {cat("\n\n Three way ANOVA or lmer analysis \n") print(x$ANOVA,digits=digits) cat("\nvariance components from lme(r)\n") print(x$s.lme,digits=digits) cat("\n Alpha reliability by subjects)\n") print(x$alpha,digits) } if(all) { cat("\n Intraclass Correlations by subjects (over time and items) \n") print(x$summary.by.person,digits) cat("\n Intraclass Correlations by time (over subjects and items) \n") print(x$summary.by.time,digits) } if(short) { cat("\nTo see the ANOVA and alpha by subject, use the short = FALSE option.")} if(!all) {cat("\n To see the summaries of the ICCs by subject and time, use all=TRUE")} cat("\n To see specific objects select from the following list:\n",names(x)[-c(1:10)]) invisible(mat) } psych/R/VSS.scree.R0000644000176200001440000000332014346224225013501 0ustar liggesusers"VSS.scree" <- function(rx,main="scree plot",sqrt=FALSE) { nvar <- dim(rx)[2] if (nvar != dim(rx)[1]) {rx <- cor(rx,use="pairwise")} values <- eigen(rx)$values if(sqrt) values[values>0] <- sqrt(values[values>0]) plot(values,type="b", main = main,ylab="Eigen values of components",xlab=" component number") abline(h=1) } #added the sqrt option 12/13/22 "scree" <- function(rx,factors=TRUE,pc=TRUE,main="Scree plot",hline=NULL,add=FALSE,sqrt=FALSE) { cl <- match.call() nvar <- dim(rx)[2] if (nvar != dim(rx)[1]) {rx <- cor(rx,use="pairwise")} if(pc) {values <- eigen(rx)$values if(factors) {ylab="Eigen values of factors and components" xlab="factor or component number"} else {ylab="Eigen values of components" xlab=" component number"} } else {values <- fa(rx)$values ylab="Eigen values of factors" xlab=" factor number" factors <- FALSE } if(sqrt) values[values>0] <- sqrt(values[values>0]) max.value <- max(values) if(!add) {plot(values,type="b", main = main ,pch=16,ylim=c(0,max.value),ylab=ylab,xlab=xlab)} else { points(values,type="b", ,pch=16) } if(factors) { fv <- fa(rx)$values if(sqrt) fv[fv>0] <- sqrt(fv[fv>0]) points(fv,type="b",pch=21,lty="dotted") } else {fv <- NULL} if(is.null(hline)) {abline(h=1)} else {abline(h=hline) } if(factors & pc) { legend("topright", c("PC ","FA"),pch=c(16,21), text.col = "green4", lty = c("solid","dotted"), merge = TRUE, bg = 'gray90')} if(pc) { results <- list(fv = fv, pcv=values,call=cl)} else { results <- list(fv=values, pcv=NULL,call=cl) } class(results) <- c("psych","scree") invisible(results) } psych/R/alpha.scale.R0000644000176200001440000000051011162161010014060 0ustar liggesusers"alpha.scale" <- function (x,y) #find coefficient alpha given a scale and a data.frame of the items in the scale { n=length(y) #number of variables Vi=sum(diag(var(y,na.rm=TRUE))) #sum of item variance Vt=var(x,na.rm=TRUE) #total test variance ((Vt-Vi)/Vt)*(n/(n-1))} #alpha psych/R/fa.extension.R0000644000176200001440000001327114533522674014344 0ustar liggesusers #modified 7/25/21 to report factor scores so that we can use biplot on the exensions. #modified again 12/03/23 to report scores as scores rather than with all the extra baggage "fa.extension" <- function(Roe,fo,correct=TRUE) { cl <- match.call() omega <-FALSE if(!is.null(class(fo)[2])) {if(inherits(fo,"fa")) { if(!is.null(fo$Phi)) {Phi <- fo$Phi} else {Phi <- NULL} fl <- fo$loadings fs <- fo$Structure } else {if (inherits(fo,"omega")) { #switched to inherits December 20, 2019 omega <- TRUE w <- fo$stats$weights fl <- fo$schmid$sl Phi <- NULL fl <- fl[,1:(dim(fl)[2]-3)] nfactors <- dim(fl)[2] fe <- t(t(w) %*% Roe) foblique <- fo$schmid$oblique feoblique <- t( Roe) %*% foblique %*% (solve(t(foblique)%*% (foblique))) feoblique <- feoblique %*% solve(fo$schmid$phi) } } } #Roe is Horn's Re R1 is Phi Pc is pattern of original = fl # Pe = Re Pc solve (Pc'Pc) solve Phi if(!omega) fe <- t( Roe) %*% fl %*% (solve(t(fl)%*% (fl))) #should we include Phi? if(!is.null(Phi)) fe <- fe %*% solve(Phi) #horn equation 26 if(!correct) {#the Gorsuch case -- not actually-- read Gorsuch again # d <- diag(t(fl) %*% fo$weight) #this is probably wrong d <- sqrt(diag(t(fl) %*% fo$weight)) #a correction of sorts for reliability fe <- (fe * d) } colnames(fe) <- colnames(fl) rownames(fe) <- colnames(Roe) if(!is.null(Phi)) {resid <- Roe - fl %*% Phi %*% t(fe)} else {resid <- Roe - fl %*% t(fe)} #fixed to actually give residual (1/30/18) result <- list(loadings = fe,Phi=Phi,resid=resid,Call=cl) if(!omega) {result <- list(loadings = fe,Phi=Phi,resid=resid,Call=cl)} else {result <- list(loadings = fe,oblique= feoblique,Phi=Phi,resid=resid,Call=cl)} class(result) <- c("psych","extension") return(result) } #written April 5, 2011 #revised August 15, 2011 to avoid using the weights matrix except in the omega case #created December 8, 2012 to allow for extension and goodness of fits of total model #modified 31/5/14 to allow for omega extension as well #modified 04-09/16 to pass the Structure matrix as well #Added the cors and correct parameters to pass to fa 1/3/21 "fa.extend" <- function(r,nfactors=1,ov=NULL,ev=NULL,n.obs = NA, np.obs=NULL,correct=TRUE,rotate="oblimin",SMC=TRUE,warnings=TRUE, fm="minres",alpha=.1, omega=FALSE,cor="cor",use="pairwise",cor.correct=.5,weight=NULL,missing=FALSE,smooth=TRUE, ...) { cl <- match.call() if(is.numeric(ev)) ev <- colnames(r)[ev] #in case we are selecting variables if(is.numeric(ov)) ov <- colnames(r)[ov] nv <- c(ov,ev) #if(nrow(r) > ncol(r)){ #the case of a data matrix if(!isCorrelation(r)){ #first find the correlations n.obs <- nrow(r) np.obs.r <- pairwiseCount(r)[nv,nv] np.obs <- np.obs.r[ov,ov] data <- r #if we want to find factor scores # r <- cor(r,use='pairwise') switch(cor, cor = {r <- cor(r,use=use) }, #does not include the weight option from fa cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=cor.correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=cor.correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=cor.correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=cor.correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=cor.correct)$rho} ) if(omega) {fo <- omega(r[ov,ov],nfactors=nfactors,rotate=rotate,SMC=SMC,warnings=warnings,fm=fm,alpha=alpha,...)} else { fo <- fa(r[ov,ov],nfactors=nfactors,rotate=rotate,SMC=SMC,warnings=warnings,fm=fm,cor=cor,alpha=alpha, missing=missing,impute="mean", smooth=smooth,...)} } else { #the case of a correlation matrix data <- NULL R <- r[ov,ov] np.obs.r <- np.obs if(omega) {fo <- omega(R,nfactors=nfactors,n.obs=n.obs,rotate=rotate,SMC=SMC,warnings=warnings,fm=fm,cor=cor,alpha=alpha,np.obs=np.obs[ov,ov],...)} else { fo <- fa(R,nfactors=nfactors,n.obs=n.obs,rotate=rotate,SMC=SMC,warnings=warnings,fm=fm,cor=cor, correct=correct,alpha=alpha,np.obs=np.obs[ov,ov],missing=missing,impute="mean",smooth=smooth,...)} } Roe <- r[ov,ev,drop=FALSE] fe <- fa.extension(Roe,fo,correct=correct) if(omega) fo$loadings <- fo$schmid$sl[,1:(ncol(fo$schmid$sl)-3)] foe <- rbind(fo$loadings,fe$loadings) if(omega) oblique <- rbind(fo$schmid$oblique,fe$oblique) if(is.na(n.obs) && !is.null(np.obs)) n.obs <- max(as.vector(np.obs)) result <- factor.stats(r[nv,nv],foe,fo$Phi,n.obs,np.obs.r,alpha=alpha,smooth=smooth,coarse=FALSE) #this is the second call to factor.stats if(omega) result$schmid$sl <- foe result$rotation <- rotate result$loadings <- foe if(nfactors > 1) {if(is.null(fo$Phi)) {h2 <- rowSums(foe^2)} else {h2 <- diag(foe %*% fo$Phi %*% t(foe)) }} else {h2 <-foe^2} result$communality <- h2 result$fm <- fm #remember what kind of analysis we did result$fo=fo if(!is.null(data)) result$scores <- factor.scores(data[,ov],fo,missing=missing,impute="mean")$scores if(omega) {result$schmid$sl <- foe result$schmid$gloading <- fo$schmid$gloading result$schmid$oblique <- oblique } if(is.null(fo$Phi)) {result$Structure <- foe } else { result$Structure <- foe %*% fo$Phi} result$fe=fe result$resid=fe$resid result$Phi=fo$Phi result$fn="fa" result$Call=cl class(result) <- c("psych","extend") return(result) } psych/R/lmCor.R0000644000176200001440000006752414547536735013041 0ustar liggesusers"set.cor" <- function(y,x,data,z=NULL,n.obs=NULL,use="pairwise",std=TRUE,square=FALSE,main="Regression Models",plot=TRUE,show=FALSE,zero=TRUE,part=FALSE) { setCor(y=y,x=x,data=data,z=z,n.obs=n.obs,use=use,std=std,square=square,main=main,plot=plot,show=show,zero=zero,part=part)} #lmCor become the common name for what was setCor "lmCor" <- "setCor" <- function(y,x,data,z=NULL,n.obs=NULL,use="pairwise",std=TRUE,square=FALSE,main="Regression Models",plot=TRUE,show=FALSE,zero=TRUE,alpha=.05,part=FALSE) { #a function to extract subsets of variables (a and b) from a correlation matrix m or data set m #and find the multiple correlation beta weights + R2 of the a set predicting the b set #seriously rewritten, March 24, 2009 to make much simpler #minor additons, October, 20, 2009 to allow for print and summary function #major addition in April, 2011 to allow for set correlation #added calculation of residual matrix December 30, 2011 #added option to allow square data matrices #modified December, 2014 to allow for covariances as well as to fix a bug with single x variable #modified April, 2015 to handle data with non-numeric values in the data, which are not part of the analysis #Modified November, 2017 to handle "lm" style input using my parse function. #modified July 4, 2018 to add intercepts and confidence intervals (requested by Franz Strich) #Modified September 25, 2019 to add the confidence intervals of the intercepts #This was a substantial rewrite of the code to include the moments matrix #modified June 5, 2023 to allow the complete cases option to work. cl <- match.call() #convert names to locations prod <- ex <- NULL #in case we do not have formula input #first, see if they are in formula mode if(inherits(y,"formula")) { ps <- fparse(y) y <- ps$y x <- ps$x med <- ps$m #but, mediation is not done here, so we just add this to x # if(!is.null(med)) x <- c(x,med) #not necessary, because we automatically put this in prod <- ps$prod z <- ps$z #do we have any variable to partial out ex <- ps$ex } # data <- char2numeric(data) #move to later (01/05/19) if(is.numeric(y )) y <- colnames(data)[y] if(is.numeric(x )) x <- colnames(data)[x] if(is.numeric(z )) z <- colnames(data)[z] #check for bad input if(any( !(c(y,x,z,ex) %in% colnames(data)) )) { cat("\nOops! Variable names are incorrect. Offending items are ", c(y, x, z, ex)[which(!(c(y, x, z, ex) %in% colnames(data)))],"\n") stop("I am stopping because the variable names are incorrect. See above.")} if(!isCorrelation(data)) { # We have a data matrix rather than a correlation matrix if(use=="complete") {cc <- complete.cases(data[,c(y,x,z,ex)]) data <- data[cc,c(y,x,z,ex)] } else { data <- data[,c(y,x,z,ex)] } data <- char2numeric(data) if(!is.matrix(data)) data <- as.matrix(data) data <- cbind(Intercept=1,data) colnames(data)[1] <- "(Intercept)" x <- c("(Intercept)",x) #this adds a ones column to the data to find intercept terms if(!is.numeric(data)) stop("The data must be numeric to proceed") if(!is.null(prod) | (!is.null(ex))) {#we want to find a product term if(zero) data[,x] <- scale(data[,x],scale=FALSE) if(!is.null(prod)) { prods <- matrix(NA,ncol=length(prod),nrow=nrow(data)) colnames(prods) <- prod colnames(prods) <- paste0("V",1:length(prod)) for(i in 1:length(prod)) { prods[,i] <- apply(data[,prod[[i]]],1,prod) colnames(prods)[i] <- paste0(prod[[i]],collapse="*") } data <- cbind(data,prods) x <- c(x,colnames(prods)) } if(!is.null(ex)) { quads <- matrix(NA,ncol=length(ex),nrow=nrow(data)) #find the quadratric terms colnames(quads) <- paste0(ex) for(i in 1:length(ex)) { quads[,i] <- data[,ex[i]] * data[,ex[i]] colnames(quads)[i] <- paste0(ex[i],"^2") } data <- cbind(data,quads) x <- c(x,colnames(quads)) } } if(use=="complete") {n.obs <- min(pairwiseCount(data), na.rm=TRUE) } else {n.obs <- max(pairwiseCount(data),na.rm=TRUE)} #this overestimates the number of observations for missing data df <- n.obs - length(x) -length(z) #we have the intercept as one degree of freedom, partialed variables count as well means <- colMeans(data,na.rm=TRUE) #use these later to find the intercept sds <- apply(data,2,sd, na.rm=TRUE) C <- cov(data,use=use) if(std) { C["(Intercept)","(Intercept)"] <- 1 m <- cov2cor(C) C <- m } else { # do it on the moments matrix to get intercepts #if(zero) C["(Intercept)","(Intercept)"] <- 1 if(zero) means[1] <- 1 m <- C if(length(z) < 1) { m <- C * (n.obs-1) + means %*% t(means) * n.obs} else { #this creates the moments matrix unless we are partialling #multiply C * (n.obs-1) to convert the covariances to momements means []<- 0 m <- C * (n.obs-1) # + means %*% t(means) * n.obs # should this be nobs m[1,1] <- n.obs } } raw <- TRUE # n.obs=dim(data)[1] #this does not take into account missing data # n.obs <- max(pairwiseCount(data),na.rm=TRUE ) #this does } else { if(!is.null(prod)) {warning("I am sorry, but product terms from a correlation matrix don't make sense and were ignored.")} raw <- FALSE if(!is.matrix(data)) data <- as.matrix(data) C <- data if(!is.null(n.obs)){ df <- n.obs - length(x) - length(z) } else {df <- NULL} if(std) {m <- cov2cor(C)} else {m <- C}} #We do all the calculations on the Covariance or correlation matrix (m) #convert names to locations if(use=="complete") {nm <- n.obs} else {nm <- dim(data)[1]} xy <- c(x,y) numx <- length(x) numy <- length(y) numz <- 0 nxy <- numx+numy m.matrix <- m[c(x,y),c(x,y)] x.matrix <- m[x,x,drop=FALSE] xc.matrix <- m[x,x,drop=FALSE] #fixed19/03/15 xy.matrix <- m[x,y,drop=FALSE] xyc.matrix <- m[x,y,drop=FALSE] #fixed 19/03/15 y.matrix <- m[y,y,drop=FALSE] if(!is.null(z)){numz <- length(z) #partial out the z variables zm <- m[z,z,drop=FALSE] za <- m[x,z,drop=FALSE] zb <- m[y,z,drop=FALSE] zmi <- solve(zm) x.matrix <- x.matrix - za %*% zmi %*% t(za) if(!part) y.matrix <- y.matrix - zb %*% zmi %*% t(zb) #part correlations do not partial y xy.matrix <- xy.matrix - za %*% zmi %*% t(zb) m.matrix <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix)) #m.matrix is now the matrix of partialled covariances -- make sure we use this one! m <- m.matrix } ########################################################## ### The actual regression calculations start here ### ### some modifications made 12/1/19 to adjust the dfs when returning SSRs if(numx == 1 ) {beta <- matrix(xy.matrix,nrow=1)/x.matrix[1,1] rownames(beta) <- rownames(xy.matrix) colnames(beta) <- colnames(xy.matrix) } else #this is the case of a single x { beta <- solve(x.matrix,xy.matrix) #solve the equation bY~aX beta <- as.matrix(beta) } if(raw) {if(numx ==1) { intercept <- beta[1,y] #intercept <- means[y] - sum(means[x] * beta[x,y ]) } else {if(numy > 1) { intercept <- means[y] - colSums(means[x] * beta[x,y ]) #fix this to treat multple y } else {intercept <- means[y] - sum(means[x] * beta[x,y ])}} } else {intercept <- NA Residual.se <- NA } x.inv <- solve(x.matrix) yhat <- t(xy.matrix) %*% x.inv %*% (xy.matrix) resid <- y.matrix - yhat #this is in moments units se.beta <- list() if(raw) { #df.fudge <- (n.obs-1) #maybe n.obs-1 if(std) { R2 <- colSums(beta * xy.matrix)/diag(y.matrix) Vaxy <- beta * xy.matrix/diag(y.matrix) VIF <- 1/(1-smc((x.matrix ))) SST <- 1 MSE <- diag(resid/df) #this is used to find the SSR was n.obs -1 Residual.se <- sqrt(diag(resid)*(n.obs-1)/(df)) #to put in the SSR units, we need to use the number of observations -1 df.fudge? for (i in 1:length(y)) { se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv)) } #se.beta <- sqrt(as.vector(MSE) * diag(x.inv)) } else { Residual.se <- sqrt(diag(resid) /df) #this is the df n.obs - length(x)) MSE <- diag(resid )/(df) if(length(y) > 1) {SST <- diag( m [y,y] - means[y]^2 * n.obs)} else {SST <- ( m [y,y] - means[y]^2 * n.obs)} R2 <- (SST - diag(resid)) /SST #we need to find the zero order correlations instead of the moments to get Vaxy C[1,1] <- 1 Rxy <- cov2cor(C) beta.std <- solve(Rxy[x,x],Rxy[x,y]) Vaxy <- beta.std * Rxy[x,y] VIF <- 1/(1-smc(Rxy[x,x])) for (i in 1:length(y)) { se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv)) } } } else { #from correlations R2 <- colSums(beta * xy.matrix)/diag(y.matrix) Vaxy <- beta * xy.matrix/diag(y.matrix) VIF <- 1/(1-smc((x.matrix ))) SST <- 1 MSE <- diag( resid/(df-1 )) #because df is inflated in normal regression because of the intercept term should this be df or df -1 df.fudge <- n.obs-1 # Residual.se <- sqrt(diag(resid)/ df) #df.fudge? # Residual.se <- sqrt(diag(resid)*(n.obs-1)/(df+1)) #since we are not estimating the intercept # Residual.se <- sqrt(diag(resid)) #*df/(n.obs-1)) #don't use df Residual.se <- sqrt(diag(resid) * (n.obs-1)/(df-1)) for (i in 1:length(y)) { se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv)) } } if (numy > 1 ) { if(is.null(rownames(beta))) {rownames(beta) <- x} if(is.null(colnames(beta))) {colnames(beta) <- y} # R2 <- colSums(beta * xy.matrix)/diag(y.matrix) # if(raw) { SST <- diag( m [y,y] - means[y]^2 * n.obs) #this seems redundant # # R2 <- (SST - resid) /SST # R2 <- (SST - diag(resid)) /SST # } else {R2 <- colSums(beta * xy.matrix)/diag(y.matrix)} # # } else { # colnames(beta) <- y # # # R2 <- sum(beta * xy.matrix)/y.matrix # R2 <- matrix(R2) rownames(beta) <- x # rownames(R2) <- colnames(R2) <- y } # # VIF <- 1/(1-smc((x.matrix ))) #now find the unit weighted correlations #reverse items in X and Y so that they are all positive signed #But this doesn't help in predicting y #we need to weight by the sign of the xy,matrix #this gives a different key for each y #need to adjust this for y # px <- principal(x.matrix) # keys.x <- diag(as.vector(1- 2* (px$loadings < 0 )) ) # py <- principal(y.matrix) # keys.y <- diag(as.vector(1- 2* (py$loadings < 0 ) )) # # Vx <- sum( t(keys.x) %*% x.matrix %*% t(keys.x)) # Vy <- sum( keys.y %*% y.matrix %*% t(keys.y)) # # ruw <- colSums(abs(xy.matrix))/sqrt(Vx) # Ruw <- sum(diag(keys.x) %*% xy.matrix %*% t(keys.y))/sqrt(Vx * Vy) #end of old way of doing it #new way (2/17/18) if(!std) C[1,1]<- 1 Cu <- cov2cor(C) keys.x <- sign(xy.matrix) #this converts zero order correlations into -1, 0, 1 weights for each y Vx <- t(keys.x) %*% Cu[x,x,drop=FALSE] %*% (keys.x) #diag are scale variances #Vy <- t(keys.x) %*% Cu[y,x,drop=FALSE] %*% keys.x #diag are y variances ? Vy <- Cu[y,y,drop=FALSE ] #(y.matrix) uCxy <- t(keys.x) %*% Cu[x,y,drop=FALSE] #xy.matrix ruw <- diag(uCxy)/sqrt(diag(Vx)) #these are the individual multiple Rs Ruw <- sum(uCxy)/sqrt(sum(Vx)*sum(Vy)) #what are these? #Now do the Set correlation from Cohen Lmat <- Mmat <- Dmat <- NULL #give them NULL values in case we don't need to find them if("(Intercept)" %in% colnames(x.matrix)) {X.matrix <- x.matrix[-1,-1,drop=FALSE] #get rid of the intercepts for the next operations XY.matrix <- xy.matrix[-1,,drop=FALSE] } else {X.matrix <- x.matrix XY.matrix <- xy.matrix } if(numy < 2) {Rset <- 1 - det(m.matrix)/(det(X.matrix) ) Myx <- solve(X.matrix) %*% XY.matrix %*% t(XY.matrix) cc2 <- cc <- T <- NULL} else {if (numx < 2) {Rset <- 1 - det(m.matrix)/(det(y.matrix) ) Myx <- XY.matrix %*% solve(y.matrix) %*% t(XY.matrix) cc2 <- cc <- T <- NULL} else {Rset <- 1 - det(m.matrix)/(det(X.matrix) * det(y.matrix)) if(numy > numx) { Myx <- solve(X.matrix) %*% XY.matrix %*% solve(y.matrix) %*% t(XY.matrix) #productes of two beta weights this is A from Bernstein Beta.y.x <-solve(X.matrix) %*% (XY.matrix) Beta.x.y <- solve(y.matrix) %*% t(XY.matrix ) } else { Myx <- solve(y.matrix) %*% t(XY.matrix )%*% solve(X.matrix) %*% (XY.matrix) Beta.y.x <-solve(X.matrix) %*% (XY.matrix) Beta.x.y <- solve(y.matrix) %*% t(XY.matrix ) } } #Myx is the canoncial correlation matrix of x ans y (see eq 12.1 in T & F) #and is the product of Beta.x.y with Beta.y.x #the following is taken from the geigen function in the fda package #see the geigen function in the fda package #now, we get rid of the xfac <- chol(X.matrix) yfac <- chol(y.matrix) xfacinv <- solve(xfac) yfacinv <- solve(yfac) Dmat <- t(xfacinv) %*% XY.matrix %*% yfacinv #almost the same as Mxy? No p <- dim(x.matrix)[1] q <- dim(y.matrix)[1] if (p >= q) { #I am not sure we need to make this choice result <- svd(Dmat) values <- result$d Lmat <- X.matrix %*% xfacinv %*% result$u Mmat <- y.matrix %*% yfacinv %*% result$v } else { result <- svd(t(Dmat)) values <- result$d Lmat <- X.matrix %*% xfacinv %*% result$v #these are the values returned from Geigen Mmat <- y.matrix%*% yfacinv %*% result$u} colnames(Lmat)<- paste0("Cx",1:NCOL(Lmat)) colnames(Mmat) <- paste0("Cy",1:NCOL(Mmat)) gei <- svd(Myx) cc2 <-gei$d #this actually matches result cc <- sqrt(cc2) # cc2 <- eigen(Myx)$values # cc <- sqrt(cc2) T <- sum(cc2)/length(cc2) } k <- NA #browser() #it would be nice to find the canonical loadings, but this is not done yet. #Although, I think that the Lmat and Mmat are the loadings but only for the two x two case of T and F if(!is.null(n.obs)) {k<- length(x)-raw #because we include the intercept in x ## Find the confidence intervals of the regressions ci.lower <- list() ci.upper <- list() df <- n.obs-k-1 -length(z) #this is the n.obs - length(x) and number of covariates # if(raw) { # uniq <- (1-smc(x.matrix) # ) #this returns values based upon correlations # #these are the standardized beta standard errors se <- matrix(unlist(se.beta),ncol=length(y)) if(!is.null(z)) {colnames(beta) <- paste0(colnames(beta),"*") } colnames(se) <- colnames(beta) if(!is.null(z)) {rownames(beta) <- paste0(rownames(beta),"*")} rownames(se) <- rownames(beta) # se <- t(t(se) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix)) #need to use m.matrix if(!raw) se <- t(t(se) * sqrt(diag(m.matrix)[y]))/sqrt(diag(x.matrix)) #corrected 11/29/18 for(i in 1:length(y)) {ci.lower[[i]] <- beta[,i] - qt(1-alpha/2,df)*se[,i] ci.upper[[i]] <- beta[,i] + qt(1-alpha/2,df)*se[,i]} ci.lower <- matrix(unlist(ci.lower),ncol=length(y)) ci.upper <- matrix(unlist(ci.upper),ncol=length(y)) colnames( ci.lower) <- colnames( ci.upper) <- colnames( beta) rownames( ci.lower)<- rownames( ci.upper) <- rownames(beta) confid.beta <- cbind(ci.lower,ci.upper) tvalue <- beta/se # prob <- 2*(1- pt(abs(tvalue),df)) prob <- -2 * expm1(pt(abs(tvalue),df,log.p=TRUE)) SE2 <- 4*R2*(1-R2)^2*(df^2)/((n.obs^2-1)*(n.obs+3)) SE =sqrt(SE2) F <- R2*df/(k*(1-R2)) # pF <- 1 - pf(F,k,df) pF <- -expm1(pf(F,k,df,log.p=TRUE)) shrunkenR2 <- 1-(1-R2)*(n.obs-1)/df #find the shrunken R2 for set cor (taken from CCAW p 615) u <- numx * numy m1 <- n.obs - max(numy ,(numx+numz)) - (numx + numy +3)/2 s <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5)) if(numx*numy ==4) s <- 1 v <- m1 * s + 1 - u/2 R2set.shrunk <- 1 - (1-Rset) * ((v+u)/v)^s L <- 1-Rset L1s <- L^(-1/s) Rset.F <- (L1s-1)*(v/u) df.m <- n.obs - max(numy ,(numx+numz)) -(numx + numy +3)/2 s1 <- sqrt((numx ^2 * numy^2 -4)/(numx^2 + numy^2-5)) #see cohen p 321 if(numx^2*numy^2 < 5) s1 <- 1 df.v <- df.m * s1 + 1 - numx * numy/2 #which is just v # df.v <- (u+v) #adjusted for bias to match the CCAW results #Rset.F <- Rset.F * (u+v)/v Chisq <- -(n.obs - 1 -(numx + numy +1)/2)*log((1-cc2)) } # if(numx == 1) {beta <- beta * sqrt(diag(C)[y]) # } else {beta <- t(t(beta) * sqrt(diag(C)[y]))/sqrt(diag(xc.matrix))} #this puts the betas into the raw units # coeff <- data.frame(beta=beta,se = se,t=tvalue, Probabilty=prob) # colnames(coeff) <- c("Estimate", "Std. Error" ,"t value", "Pr(>|t|)") x.inv <- solve(C[x,x]) yhat <- t(C[x,y,drop=FALSE]) %*% x.inv %*% (C[x,y]) resid <- C[y,y] - yhat #this is now in covariance units Vaxy <- as.matrix(Vaxy) if(is.null(n.obs)) {set.cor <- list(coefficients=beta,R=sqrt(R2),R2=R2,Rset=Rset,T=T,cancor = cc, cancor2=cc2, raw=raw,residual=resid,SE.resid=Residual.se,df=c(k,df),ruw=ruw,Ruw=Ruw, x.matrix=C[x,x],y.matrix=C[y,y], VIF=VIF,z=z,std=std,Vaxy=Vaxy,Xmat=Lmat, Ymat=Mmat, Call = cl) } else { set.cor <- list(coefficients=beta,se=se,t=tvalue,Probability = prob,ci=confid.beta, R=sqrt(R2),R2=R2,shrunkenR2 = shrunkenR2,seR2 = SE,F=F,probF=pF,df=c(k,df), SE.resid=Residual.se,Rset=Rset,Rset.shrunk=R2set.shrunk,Rset.F=Rset.F,Rsetu=u, Rsetv=df.v,T=T,cancor=cc,cancor2 = cc2,Chisq = Chisq,raw=raw,residual=resid,ruw=ruw, Ruw=Ruw,x.matrix=C[x,x],y.matrix=C[y,y],VIF=VIF,z=z,data=data,std = std,Vaxy=Vaxy,Xmat=Lmat, Ymat=Mmat,Call = cl) } class(set.cor) <- c("psych","lmCor") if(plot) lmDiagram(set.cor,main=main,show=show) return(set.cor) } #modified July 12,2007 to allow for NA in the overall matrix #modified July 9, 2008 to give statistical tests #modified yet again August 15 , 2008 to convert covariances to correlations #modified January 3, 2011 to work in the case of a single predictor #modified April 25, 2011 to add the set correlation (from Cohen) #modified April 21, 2014 to allow for mixed names and locations in call #modified February 19, 2015 to just find the covariances of the data that are used in the regression #this gets around the problem that some users have large data sets, but only want a few variables in the regression #corrected February 17, 2018 to correctly find the unweighted correlations #corrected December 12, 2020 to correctly report the se of the intercept as well as the residual.se #I had been been off by sqrtdf/(n.obs-1) #mdified Sept 22, 2018 to allow cex and l.cex to be set #mdified November, 2017 to allow an override of which way to draw the arrows #modifed May 1, 2023 to allow for drawing lm output as well. lmDiagram <- lmCor.diagram <- setCor.diagram <- function(sc,main="Regression model",digits=2,show=FALSE,cex=1,l.cex=1,...) { if(missing(l.cex)) l.cex <- cex beta <- round(sc$coefficients,digits) #two cases #the normal case, the model comes from lmCor #the 2nd case, the model comes from lm if(length(class(sc)) ==1) {lm<- TRUE} else { lm <- FALSE} if(!lm) {#case 1 if(rownames(beta)[1] %in% c("(Intercept)", "intercept*")) {intercept <- TRUE} else {intercept <- FALSE} x.matrix <- round(sc$x.matrix,digits) y.matrix <- round(sc$y.matrix,digits) y.resid <- round(sc$resid,digits) x.names <- rownames(beta) if(intercept){ x.matrix <- x.matrix[-intercept,-intercept] x.names <- x.names[-intercept] beta <- beta[-intercept,,drop=FALSE] } y.names <- colnames(beta) } else { x.matrix <- round(cov(sc$model[-1]), digits=digits) #case 2 from lmkl x.names <- colnames(sc$model)[-1] beta <- as.matrix(beta[-1]) #make the vector into a matrix y.names <- colnames(sc$model)[1] } nx <- length(x.names) ny <- length(y.names) top <- max(nx,ny) xlim=c(-nx/3,10) ylim=c(0,top) top <- max(nx,ny) x <- list() y <- list() var.list <- arrow.list <- curve.list <- self.list<- list() x.scale <- top/(nx+1) y.scale <- top/(ny+1) plot(NA,xlim=xlim,ylim=ylim,main=main,axes=FALSE,xlab="",ylab="") for(i in 1:nx) {x[[i]] <- dia.rect(3,top-i*x.scale,x.names[i],cex=cex,draw=TRUE,...) var.list <- c(var.list,x.names[i],x[[i]])} for (j in 1:ny) {y[[j]] <- dia.rect(7,top-j*y.scale,y.names[j],cex=cex,draw=TRUE,...) var.list <- c(var.list,y.names[j],y[[j]]) } for(i in 1:nx) { for (j in 1:ny) { d.arrow <- dia.arrow(x[[i]]$right,y[[j]]$left,labels = beta[i,j],adj=4-j,cex=l.cex,draw=FALSE,...) arrow.list <- c(arrow.list,d.arrow) } } if(nx >1) { for (i in 2:nx) { for (k in 1:(i-1)) {dca <- dia.curved.arrow(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k)),both=TRUE,dir="u",cex = l.cex,draw=FALSE,...) curve.list <- c(curve.list,dca)} #dia.curve(x[[i]]$left,x[[k]]$left,x.matrix[i,k],scale=-(abs(i-k))) } } } if(ny >1) {for (i in 2:ny) { for (k in 1:(i-1)) {dca <- dia.curved.arrow(y[[i]]$right,y[[k]]$right,y.resid[i,k],scale=(abs(i-k)),dir="u",cex=l.cex,draw=FALSE, ...) curve.list <- c(curve.list,dca)} }} for(i in 1:ny) {d.self <- dia.self(y[[i]],side=3,scale=.2,draw=FALSE,... ) self.list <- c(self.list,d.self)} if(show) {text((10-nx/3)/2,0,paste("Unweighted matrix correlation = ",round(sc$Ruw,digits)))} #now draw all the arrows at once multi.rect(var.list,...) multi.arrow(arrow.list,...) multi.curved.arrow(curve.list,...) multi.self(self.list,...) } print_psych.lmCor <- function(x,digits=2) { cat("Call: ") print(x$Call) if(x$raw) {cat("\nMultiple Regression from raw data \n")} else { cat("\nMultiple Regression from matrix input \n")} ny <- NCOL(x$coefficients) for(i in 1:ny) {cat("\n DV = ",colnames(x$coefficients)[i],"\n") # if(!is.na(x$intercept[i])) {cat(' intercept = ',round(x$intercept[i],digits=digits),"\n")} if(!is.null(x$se)) {result.df <- data.frame( round(x$coefficients[,i],digits),round(x$se[,i],digits),round(x$t[,i],digits),signif(x$Probability[,i],digits),round(x$ci[,i],digits), round(x$ci[,(i +ny)],digits),round(x$VIF,digits)) colnames(result.df) <- c("slope","se", "t", "p","lower.ci","upper.ci", "VIF") print(result.df) result.df <- data.frame(R = round(x$R[i],digits), R2 = round(x$R2[i],digits), Ruw = round(x$ruw[i],digits),R2uw = round( x$ruw[i]^2,digits), round(x$shrunkenR2[i],digits),round(x$seR2[i],digits), round(x$F[i],digits),x$df[1],x$df[2], signif(x$probF[i],digits+1)) colnames(result.df) <- c("R","R2", "Ruw", "R2uw","Shrunken R2", "SE of R2", "overall F","df1","df2","p") cat("\n Multiple Regression\n") print(result.df) } else { result.df <- data.frame( round(x$beta[,i],digits),round(x$VIF,digits)) colnames(result.df) <- c("slope", "VIF") print(result.df) result.df <- data.frame(R = round(x$R[i],digits), R2 = round(x$R2[i],digits), Ruw = round(x$Ruw[i],digits),R2uw = round( x$Ruw[i]^2,digits)) colnames(result.df) <- c("R","R2", "Ruw", "R2uw") cat("\n Multiple Regression\n") print(result.df) } } }psych/R/bi.bars.R0000644000176200001440000000465713210634733013262 0ustar liggesusersbi.bars <- function(x,var=NULL, grp=NULL,horiz,color,label=NULL,zero=FALSE,xlab,ylab,...) { if(missing(horiz)) horiz <- TRUE if(missing(color)) color <- c("blue","red") #new way is cleaner if(!is.null(var) & (length(var)==1)) {if(missing(ylab) & (length(var) ==1)) {ylab <- var} x <- x[,c(var,grp) , drop = FALSE] if(is.null(grp)) {stop("I am stopping. The grouping variable was not specified")} grp <- x[,grp,drop=FALSE] x <- as.numeric( x[,var]) } else {grp <- var} #the old way if(horiz) { if(missing(xlab)) xlab <- "Frequency" if(missing(ylab)) ylab <- paste0(levels(grp)) } else { if(missing(ylab)) ylab <- "Frequency" if(missing(xlab)) xlab <- paste0(levels(grp))} groups <- table(grp) max.val <- max(x,na.rm=TRUE) min.val <- min(x,na.rm=TRUE) #gr1 <- as.numeric(names(groups)[1]) #gr2 <- as.numeric(names(groups)[2]) gr1 <- (names(groups)[1]) gr2 <- (names(groups)[2]) g1 <- subset(x,grp==gr1) g2 <- subset(x,grp==gr2) t1 <- tabulate(g1-min.val*zero,nbins=(max.val-min.val+1)) t2 <- tabulate(g2-min.val*zero,nbins=(max.val-min.val+1)) #t1 <- table(g1-min.val*zero) #t2 <- table(g2-min.val*zero) m1 <- max(t1,t2) m2 <- max(t1,t2) xlim <- c(-m1,m2)*1.04 if(horiz) { #t1 <- t1[t1 > 0] xloc <- barplot(-t1,xlim=xlim,col=color[1],horiz=horiz,xlab=xlab,ylab=ylab,axes=FALSE,axisnames=FALSE,...) barplot(t2,add=TRUE,col=color[2],horiz=horiz,axes=FALSE,axisnames=FALSE,...) box() if((max.val - min.val) < 10) { if(is.null(label)) {axis(2,at=xloc+min.val*zero,labels=min.val:max.val,...)} else { axis(2,at=xloc+min.val*zero,labels=label,las=2,...)}} else { at <- axTicks(2,usr=c(min.val,max.val)) axis(2,at=at,labels=at + min.val*zero,las=2,...)} atv <- axTicks(1) axis(1,at=atv,labels=abs(atv),...)} else { #the vertical case ylim <- c(-m1,m2)*1.04 xloc <- barplot(-t1,ylim=ylim,col=color[1],horiz=horiz,xlab=xlab,ylab=ylab,axes=FALSE,...) barplot(t2 ,add=TRUE,col=color[2],horiz=horiz,axes=FALSE,...) box() atv <- axTicks(2) axis(2,at=atv,labels=abs(atv),las=2,...) if((max.val - min.val) < 10) { if(is.null(label)) {axis(1,at=xloc,labels=min.val:max.val,...)} else { axis(1,at=xloc,labels=label,...)} } else { at <- axTicks(1,usr=c(min.val,max.val)) axis(1,at=at,labels=at+min.val*zero,...) }} } #modified December 2, 2017 to be compatible with densityBy and violinBy syntax psych/R/unidim.r0000644000176200001440000000766614524215576013303 0ustar liggesusers #A number of estimates of unidimensionality #Developed March 9. 2017 #Modified August 3 to consider two more estimates #cleaned up Sept 18, 2023 to match ms #reversed keys and x to be consistent with other functions "unidim" <- function(keys=NULL,x=NULL,cor="cor",correct=.5, check.keys=TRUE) { cl <- match.call() covar <- FALSE use <- "pairwise" if(!is.null(x)) {n.keys <- length(keys) } else {x <- keys keys <- NULL n.keys <- 1} all.x <- x results <- list() for(scale in 1:n.keys) { if(!is.null(keys)) { select <- keys[[scale]] flipper <- rep(1,length(select)) flipper[grep("-",select)] <- -1 if(is.numeric(select)) {select <- abs(select) } else { select <- sub("-","",unlist(select)) } if(isCorrelation(all.x)) {x <- all.x[select,select] } else {x <- all.x[,select]} } else {flipper <- rep(1,ncol(x))} #this allows us to handle multiple scales if(!isCorrelation(x) ) { switch(cor, cor = { x <- cor(x,use=use) covar=FALSE}, cov = {x <- cov(x,use=use) covar <- TRUE}, spearman = {x <- cor(x,use=use,method="spearman")}, kendall = {x <- cor(x,use=use,method="kendall")}, tet = {x <- tetrachoric(x,correct=correct)$rho}, tetrachoric = {x <- tetrachoric(x,correct=correct)$rho}, poly = {x <- polychoric(x,correct=correct)$rho}, polychoric = {x <- polychoric(x,correct=correct)$rho}, mixed = {x <- mixedCor(x,use=use,correct=correct)$rho} )} #X is now a correlation matrix or a covariance matrix f1 <- fa(x,covar=covar) # a one factor solution g <- sum(f1$model) # sum(f1$loadings %*% t(f1$loadings)) n <- NCOL(x) Vt <- sum(x) om.g <- g/Vt #model/ r om.t <- (Vt - sum(f1$uniqueness))/Vt #total reliability uni.orig <- g/ (Vt - sum(f1$uniqueness)) #raw unidimensionality #now, to find traditional alpha, we need to flip negative items if(check.keys | n.keys == 1) { flipper <- rep(1,n) flipper[sign(f1$loadings ) < 0] <- -1 } x <- diag(flipper) %*% x %*% diag(flipper) Vt <- sum(x) median.r <- median(x[lower.tri(x)]) #lower tri does not count diagonal alpha.std <- (1- tr(x)/Vt)*(n/(n-1)) av.r <- (Vt-tr(x))/(n*(n-1)) omega.flip <- sum(diag(flipper) %*% f1$model %*% diag(flipper))/Vt omega.total.flip <- (Vt - sum(f1$uniqueness))/Vt flipperped.loadings <- flipper * f1$loadings g.flipperped <- sum(flipperped.loadings%*% t(flipperped.loadings)) uni.flipper <- g.flipperped/(Vt - sum(f1$uniqueness)) # How well does the alpha model predict the correlation matrix? alpha.res <- sum(lower.tri(x)* (x-av.r)^2)/sum(lower.tri(x) * x^2) #squared residual /squared original # uni <- uni.flipper * (1-alpha.res) uni <- f1$fit.off * (1-alpha.res) stats <- list(u=uni,alpha.res=1-alpha.res, fit.off= f1$fit.off, alpha=alpha.std,av.r = av.r,median.r = median.r, uni.flipper = uni.flipper, uni=uni.orig,om.g=om.g, omega.pos = omega.flip,om.t=om.t, om.total.flip= omega.total.flip) if(!is.null(keys)) {results[[names(keys[scale])]]<- stats } else {results <- stats} } temp <- matrix(unlist(results),ncol=12,byrow=TRUE) colnames(temp) <- c("u","tau","con","alpha","av.r","median.r","Unidim.A","Unidim","model","model.A", "total", "total.A") rownames(temp) <- names(keys) if(check.keys) { results <- list(uni=temp[,1:7])} else {results <- list(uni=temp)} results$Call <- cl class(results) <- c("psych","unidim") return(results) } print_psych.unidim <- function(x,digits=2) { cat("\nA measure of unidimensionality \n Call: ") print(x$Call) cat("\nUnidimensionality index = \n" ) print(round(x$uni,digits=digits)) cat("\nunidim adjusted index reverses negatively scored items.") cat("\nalpha "," Based upon reverse scoring some items.") cat ("\naverage and median correlations are based upon reversed scored items") } psych/R/sim.VSS.R0000644000176200001440000000151011124337677013176 0ustar liggesusers"sim.VSS" <- function(ncases=1000,nvariables=16,nfactors=4,meanloading=.5,dichot=FALSE,cut=0) #generates a simple structure factor matrix #with nfactors { weight=sqrt(1-meanloading*meanloading) #loadings are path coefficients theta=matrix(rnorm(ncases*nfactors),nrow=ncases,ncol=nvariables) #generates nfactor independent columns, repeated nvariable/nfactor times) error=matrix(rnorm(ncases*nvariables),nrow=ncases,ncol=nvariables) #errors for all variables items=meanloading*theta+weight*error #observed score = factor score + error score if(dichot) {items <- (items[,1:nvariables] >= cut) items <- items + 0} return(items) } psych/R/irt.2p.R0000644000176200001440000000155112253362153013046 0ustar liggesusers"irt.2p" <- function(delta,beta,items) { #find the person parameters in a 2 parameter model we use deltas and betas from irt.discrim and irt.person.rasch #find the person parameter irt.2par <- function(x,delta,beta,scores) { fit <- -1*(log(scores/(1+exp(beta*(delta-x))) + (1-scores)/(1+exp(beta*(x-delta))))) mean(fit,na.rm=TRUE) } num <- dim(items)[1] fit <- matrix(NaN,num,2) total <- rowMeans(items,na.rm=TRUE) count <- rowSums(!is.na(items)) for (i in 1:num) { if (count[i]>0) {myfit <- optimize(irt.2par,c(-4,4),beta=beta,delta=delta,scores=items[i,]) #how to do an apply? fit[i,1] <- myfit$minimum fit[i,2] <- myfit$objective #fit of optimizing program } else { fit[i,1] <- NA fit[i,2] <- NA } #end if else } #end loop irt.2p <-data.frame(total,theta=fit[,1],fit=fit[,2],count)} psych/R/factor.wls.R0000644000176200001440000002326012456326542014022 0ustar liggesusers"factor.wls" <- function(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA,scores=FALSE,SMC=TRUE,missing=FALSE,impute="median", min.err = .001,digits=2,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="wls") { cl <- match.call() .Deprecated("fa",msg="factor.wls is deprecated. Please use the fa function with fm=wls.") #this does the WLS or ULS fitting depending upon fm "fit.residuals" <- function(Psi,S,nf,S.inv,fm) { diag(S) <- 1- Psi if(fm=="wls") {if(!is.null(S.inv)) sd.inv <- diag(1/diag(S.inv)) } else {if(!is.null(S.inv)) sd.inv <- ((S.inv))} #gls eigens <- eigen(S) eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps if(nf >1 ) {loadings <- eigens$vectors[,1:nf] %*% diag(sqrt(eigens$values[1:nf])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) #weighted least squares weights by the importance of each variable if(fm=="wls" ) {residual <- sd.inv %*% (S- model)^2 %*% sd.inv} else {if(fm=="gls") {residual <- (sd.inv %*% (S- model))^2 } else {residual <- (S-model)^2 } } # the uls solution usually seems better? diag(residual) <- 0 error <- sum(residual) } #this code is taken (with minor modification to make ULS or WLS) from factanal #it does the iterative calls to fit.residuals "fit" <- function(S,nf,fm) { S.smc <- smc(S) if((fm=="wls") | (fm =="gls") ) {S.inv <- solve(S)} else {S.inv <- NULL} if(sum(S.smc) == nf) {start <- rep(.5,nf)} else {start <- 1- S.smc} res <- optim(start, fit.residuals, method = "L-BFGS-B", lower = .005, upper = 1, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S, S.inv=S.inv,fm=fm ) if((fm=="wls") | (fm=="gls")) {Lambda <- FAout.wls(res$par, S, nf)} else { Lambda <- FAout(res$par, S, nf)} result <- list(loadings=Lambda,res=res) } #these were also taken from factanal FAout <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1L:q, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1L:q] - 1, 0)), q) diag(sqrt(Psi)) %*% load } FAout.wls <- function(Psi, S, q) { diag(S) <- 1- Psi E <- eigen(S,symmetric = TRUE) L <- E$vectors[,1:q,drop=FALSE] %*% diag(sqrt(E$values[1:q,drop=FALSE]),q) return(L) } ## now start the main function ## now start the main function if((fm !="pa") & (fm !="minres" )& (fm != "gls") & (fm != "wls")) {message("factor method not specified correctly, weighted least squares used") fm <- "wls" } n <- dim(r)[2] if (n!=dim(r)[1]) { n.obs <- dim(r)[1] if(scores) {x.matrix <- r if(missing) { #impute values miss <- which(is.na(x.matrix),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x.matrix,na.rm=TRUE) #replace missing values with means x.matrix[miss]<- item.means[miss[,2]]} else { item.med <- apply(x.matrix,2,median,na.rm=TRUE) #replace missing with medians x.matrix[miss]<- item.med[miss[,2]]} }} r <- cor(r,use="pairwise") # if given a rectangular matrix, then find the correlations first } else { if(!is.matrix(r)) { r <- as.matrix(r)} sds <- sqrt(diag(r)) #convert covariance matrices to correlation matrices r <- r/(sds %o% sds) } #added June 9, 2008 if (!residuals) { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),fit=0)} else { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)} r.mat <- r Phi <- NULL colnames(r.mat) <- rownames(r.mat) <- colnames(r) if(SMC) { if(nfactors < n/2) {diag(r.mat) <- smc(r) } else {if (warnings) message("too many factors requested for this number of variables to use SMC, 1s used instead")} } orig <- diag(r) comm <- sum(diag(r.mat)) err <- comm i <- 1 comm.list <- list() if(fm=="pa") { while(err > min.err) #iteratively replace the diagonal with our revised communality estimate { eigens <- eigen(r.mat,symmetric=symmetric) #loadings <- eigen.loadings(eigens)[,1:nfactors] if(nfactors >1 ) {loadings <- eigens$vectors[,1:nfactors] %*% diag(sqrt(eigens$values[1:nfactors])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) new <- diag(model) comm1 <- sum(new) diag(r.mat) <- new err <- abs(comm-comm1) if(is.na(err)) {warning("imaginary eigen value condition encountered in fa,\n Try again with SMC=FALSE \n exiting fa") break} comm <- comm1 comm.list[[i]] <- comm1 i <- i + 1 if(i > max.iter) {if(warnings) {message("maximum iteration exceeded")} err <-0 } } } if((fm == "wls")| (fm=="gls")) { #added May 25, 2009 to do WLS fits uls <- fit(r,nfactors,fm) eigens <- eigen(r) #used for the summary stats result$par <- uls$res loadings <- uls$loadings } # a weird condition that happens with the Eysenck data #making the matrix symmetric solves this problem if(!is.double(loadings)) {warning('the matrix has produced imaginary results -- proceed with caution') loadings <- matrix(as.double(loadings),ncol=nfactors) } #make each vector signed so that the maximum loading is positive - probably should do after rotation #Alternatively, flip to make the colSums of loading positive if (FALSE) { if (nfactors >1) { maxabs <- apply(apply(loadings,2,abs),2,which.max) sign.max <- vector(mode="numeric",length=nfactors) for (i in 1: nfactors) {sign.max[i] <- sign(loadings[maxabs[i],i])} loadings <- loadings %*% diag(sign.max) } else { mini <- min(loadings) maxi <- max(loadings) if (abs(mini) > maxi) {loadings <- -loadings } loadings <- as.matrix(loadings) if(fm == "minres") {colnames(loadings) <- "MR1"} else {colnames(loadings) <- "PA1"} } #sign of largest loading is positive } #added January 5, 2009 to flip based upon colSums of loadings if (nfactors >1) {sign.tot <- vector(mode="numeric",length=nfactors) sign.tot <- sign(colSums(loadings)) loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) <0) {loadings <- -as.matrix(loadings)} else {loadings <- as.matrix(loadings)} colnames(loadings) <- "MR1" } #end addition if(fm == "wls") {colnames(loadings) <- paste("WLS",1:nfactors,sep='') } else {if(fm == "gls") {colnames(loadings) <- paste("GLS",1:nfactors,sep='')} else {colnames(loadings) <- paste("MR",1:nfactors,sep='')}} rownames(loadings) <- rownames(r) loadings[loadings==0.0] <- 10^-15 #added to stop a problem with varimax if loadings are exactly 0 model <- loadings %*% t(loadings) f.loadings <- loadings #used to pass them to factor.stats if(rotate != "none") {if (nfactors > 1) { if (rotate=="varimax" | rotate=="quartimax") { rotated <- do.call(rotate,list(loadings)) loadings <- rotated$loadings Phi <- NULL} else { if ((rotate=="promax")|(rotate=="Promax")) {pro <- Promax(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate == "cluster") {loadings <- varimax(loadings)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax") { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- do.call(rotate,list(loadings) ) loadings <- ob$loadings Phi <- ob$Phi} } }}} }} #just in case the rotation changes the order of the factors, sort them #added October 30, 2008 if(nfactors >1) { ev.rotated <- diag(t(loadings) %*% loadings) ev.order <- order(ev.rotated,decreasing=TRUE) loadings <- loadings[,ev.order]} rownames(loadings) <- colnames(r) if(!is.null(Phi)) {Phi <- Phi[ev.order,ev.order] } #January 20, 2009 but, then, we also need to change the order of the rotation matrix! class(loadings) <- "loadings" if(nfactors < 1) nfactors <- n result <- factor.stats(r,loadings,Phi,n.obs) #do stats as a subroutine common to several functions result$communality <- round(diag(model),digits) result$uniquenesses <- round(diag(r-model),digits) result$values <- round(eigens$values,digits) result$loadings <- loadings if(!is.null(Phi)) {result$Phi <- Phi} if(fm == "pa") result$communality.iterations <- round(unlist(comm.list),digits) if(scores) {result$scores <- factor.scores(x.matrix,loadings) } result$factors <- nfactors result$fn <- "factor.wls" result$Call <- cl class(result) <- c("psych", "fa") return(result) } #modified October 30, 2008 to sort the rotated loadings matrix by the eigen values. psych/R/cluster.plot.R0000644000176200001440000001026714444576301014400 0ustar liggesusers#revised Sept 16, 2013 to give control over the positon (pos) and size (cex) of the labels #revised June 21, 2016 to allow naming of the points "cluster.plot" <- function(ic.results,cluster=NULL,cut = 0.0,labels=NULL, title="Cluster plot",pch=18,pos,show.points=TRUE,choose=NULL,...) { if (!is.matrix(ic.results) ) {if (!is.null(class(ic.results)) ) { if(inherits(ic.results[1],"kmeans")) { load <- t(ic.results$centers) } else { load <-ic.results$loadings} }} else {load <- ic.results} if(!is.null(choose)) load <- load[,choose,drop=FALSE] nc <- dim(load)[2] nvar <- dim(load)[1] #defined locally, so as to be able to pass a parameter to it "panel.points" <- function (x, y, pch = par("pch"), ...) { ymin <- min(y) ymax <- max(y) xmin <- min(x) xmax <- max(x) ylim <- c(min(ymin,xmin),max(ymax,xmax)) xlim <- ylim if(show.points) points(x, y, pch = pch,ylim = ylim, xlim= xlim,...) text(x,y,vnames,...) } if(missing(pos)) pos <- rep(1,nvar) #this allows more control over plotting ch.col=c("black","blue","red","gray","black","blue","red","gray") if (is.null(cluster)) { cluster <- rep(nc+1,nvar) cluster <- apply( abs(load) ,1,which.max) cluster[(apply(abs(load),1,max) < cut)] <- nc+1 } if (nc > 2 ) { vnames <- labels #global variable pairs(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],main=title,lower.panel=panel.points,upper.panel=panel.points,...) } else { if(show.points) { plot(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],main=title,...) } else { plot(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],main=title,type="n",...) pos=NULL} abline(h=0) abline(v=0)} if(is.null(labels)) labels <- paste(1:nvar) if(nc < 3) text(load,labels,pos=pos,...) } "factor.plot" <- function(ic.results,cluster=NULL,cut = 0.0,labels=NULL, title,jiggle=FALSE,amount=.02,pch=18,pos,show.points=TRUE,...) { #deprecated fa.plot(ic.results,cluster=cluster,cut=cut,labels=labels,title=title,jiggle=jiggle,amount=amount,pch=pch,pos=pos,show.points=show.points,...) } "fa.plot" <- function(ic.results,cluster=NULL,cut = 0.0,labels=NULL, title,jiggle=FALSE,amount=.02,pch=18,pos,show.points=TRUE,choose=NULL,main=NULL,...) { if(missing(title) ) { title="Plot" if (length(class(ic.results)) >1 ) {if (inherits(ic.results, "fa")) {title = "Factor Analysis"} else { if (inherits(ic.results,"principal")) {title = "Principal Component Analysis"} } } } if(missing(main)) {main<- title} else {title <- main} #getting rid of confusion from years ago if (!is.matrix(ic.results)) { if (!is.null(class(ic.results))) { if(inherits(ic.results, "kmeans")) { load <- t(ic.results$centers) } else { load <-ic.results$loadings} }} else {load <- ic.results} if(is.null(colnames(load))) colnames(load) <- paste("F",1:ncol(load),sep="") if(!is.null(choose)) load <- load[,choose,drop=FALSE] nc <- dim(load)[2] nvar <- dim(load)[1] if(missing(pos)) pos <- rep(1,nvar) #this allows more control over plotting ch.col=c("black","blue","red","gray","black","blue","red","gray") if (is.null(cluster)) { cluster <- rep(nc+1,nvar) cluster <- apply( abs(load) ,1,which.max) cluster[(apply(abs(load),1,max) < cut)] <- nc+1 } #define this function inside the bigger function so that vnames is globabl to it "panel.points" <- function (x, y, pch = par("pch"), ...) { ymin <- min(y) ymax <- max(y) xmin <- min(x) xmax <- max(x) ylim <- c(min(ymin,xmin),max(ymax,xmax)) xlim <- ylim if(show.points) points(x, y, pch = pch,ylim = ylim, xlim= xlim,...) text(x,y,vnames,...) } if(jiggle) load <- jitter(load,amount=amount) if (nc > 2 ) { vnames <- labels # pairs(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],lower.panel=panel.points,upper.panel = panel.points,main=title,...) } else { if(show.points) { plot(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],main=title,...) } else { plot(load,pch = cluster+pch,col=ch.col[cluster],bg=ch.col[cluster],main=title,type="n",...) pos=NULL} abline(h=0) abline(v=0) if(is.null(labels)) labels <- paste(1:nvar) text(load,labels,pos=pos,...)} } psych/R/crossValidation.r0000644000176200001440000001252414526765101015143 0ustar liggesusers"crossValidation" <- function(model,data, options=NULL,select=NULL) { cl <- match.call() obnames <- cs(lmCor,bestScales ) value <- inherits(model, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} switch(value, lmCor = {wt <- model$coefficients if(rownames(wt)[1] =="(Intercept)") wt <- wt[-1,,drop=FALSE] }, bestScales ={#the default if(is.null(options)){options <- "best.keys"} switch(options, best.keys = { keys <- model$best.keys select <- c(names(keys),selectFromKeys(keys)) data <- data[,select] nvar <- ncol(data) wt <- make.keys(data,keys) keys <- model$best.keys }, weights = {wt <- model$weights}, optimal.keys ={ keys <- model$optimal.keys select <- c(names(keys),selectFromKeys(keys)) data <- data[,select] nvar <- ncol(data) wt <- make.keys(data,keys) }, optimal.weights ={ wt <- model$optimal.weights} ) }, none = {wt<- model} #from lmCof ) nvar <- nrow(wt) predictors <- rownames(wt) criteria <- colnames(wt) vars <- c(criteria,predictors) npred <- length(predictors) ncrit <-length(criteria) if(isCorrelation(data)) {raw <- FALSE R <- data[vars,vars] } else {raw <- TRUE data <- data[,vars, drop=FALSE ] #select just the variables in the model R <- cor(data,use="pairwise") } #this is a matrix of the (criteria + predictors) cv <- rep(NA,ncrit) Mij <- matrix(NA,ncrit,ncrit) for(i in 1:ncrit) {y <- wt[,i,drop=FALSE] wtm <- diag(wt[,i]) Cxx <- wtm %*% R[predictors,predictors] %*% wtm Vxx <- sum(Cxx) Cxyi <- wt[,i,drop=FALSE] * R[i,predictors] Cxxy <- wt[,i] *t(R[1:ncrit,predictors,drop=FALSE]) cv[i]<- sum(Cxyi)/sqrt(Vxx) #Cxyi <- (Cxyi)/sqrt(Vxx) Mij[,i] <- colSums( Cxxy)/sqrt(Vxx) } Mij <- t(Mij) names(cv) <- criteria #now do the raw correlation approach if(raw) {x.data <- scale(data[,predictors]) pred <- matrixMult.na(x.data,wt,scale = FALSE) item.R <- cor(pred,data[,criteria],use="pairwise") colnames(Mij) <- rownames(Mij)<-colnames(item.R) cv.items <-diag(item.R) crossValid <- data.frame(items = cv.items, mat = cv)} else { crossValid <- cv item.R <- NULL #fix the print function to not show these } nvars <- length(vars) result <- list(crossV = crossValid,nvars=npred,item.R = item.R,mat.R = Mij,Call =cl) class(result) <- c("psych","crossV") return(result) } #small function to call matplot with labels matPlot <- function(x, type = "b", minlength=6, xlas=0,legend=NULL,lab=NULL,pch=16,col=1:6,lty=NULL,...) { rownames(x)<- abbreviate(rownames(x),minlength=minlength) nvar <- NCOL(x) if(missing(pch)) pch <- seq(15,(15+nvar)) if(missing(lty)) lty <- 1:8 matplot(x,type=type,xaxt="n",pch=pch,col=col,lty=lty,...) #now add the labels at1 <- (1:nrow(x)) labx <- (rownames(x)) axis(1,at=at1,labels=labx,las=xlas) if(!is.null(legend)) { lab <- colnames(x) legend.location <- c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center","none") legend(legend.location[legend],legend=lab,lty=lty,pch = pch,col=col)} } #Fixed 9/9/20 to handle case of single criterion (by using drop=FALSE in appropriate places) #added 8/1/22 to do multiple cross validations using bootstrap resampling crossValidationBoot <- function(y,x,data,n.iter=100) { #allow formulat input #first, see if they are in formula mode orig.y <- z <- NULL if(inherits(y,"formula")) { orig.y <- y ps <- fparse(y) y <- ps$y x <- ps$x med <- ps$m #but, mediation is not done here, so we just add this to x # if(!is.null(med)) x <- c(x,med) #not necessary, because we automatically put this in prod <- ps$prod z <- ps$z #do we have any variable to partial out ex <- ps$ex if(!is.null(ex)) message("I am sorry, cross validation of product terms has not been implemented") } summary <- matrix(NA,nrow=n.iter,ncol=2*length(y)) coeff <- list() yn <- colnames(data[y]) cl <- match.call() n.obs <- NROW(data) for (i in 1:n.iter) {ss <- sample(n.obs,n.obs,replace=TRUE) #the bootstrap sample if(!is.null(orig.y)) {y <- orig.y} model <- lmCor(y=y,x=x,data=data[ss,], plot=FALSE) if(!is.null(z)) { rownames(model$coefficients) <- gsub("\\*","",rownames(model$coefficients)) colnames(model$coefficients) <- gsub("\\*","",colnames(model$coefficients))} cross <- crossValidation(model,data=data[-ss,]) #the hold out sample summary[i,] <- c(model$R,diag(cross$item.R)) coeff[[i]] <- model$coefficients } if(length(y) >1 ) {colnames(summary) <- c(paste0("Derivation-",yn),paste0("Cross-",yn))} else { colnames(summary) <- c("Derivation", "Cross.validation")} coeff <- matrix(unlist(coeff), ncol=length(model$coefficients),byrow=TRUE) if (length(y) > 1) {colnames(coeff) <- paste(rep(yn,each=NROW(model$coefficients)), rownames(model$coefficients))} else { colnames(coeff) <- rownames(model$coefficients)} result <- list(mean.fit = colMeans(summary),mean.coeff=colMeans(coeff), trials = summary, coefficients = coeff,Call=cl) class(result) <- c("psych","crossV") return(result) } psych/R/fa.multi.R0000644000176200001440000001356614471272251013463 0ustar liggesusers"fa.multi" <- function(r,nfactors=3,nfact2=1,n.obs = NA,n.iter=1,rotate="oblimin",scores="regression", residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,oblique.scores=FALSE,np.obs=NULL,use="pairwise",cor="cor",...) { cl <- match.call() if(nfactors < 2) stop("number of lower level factors must be at least 2") f1 <- fa(r=r,nfactors=nfactors,n.obs=n.obs,rotate=rotate,scores=scores,residuals=residuals,SMC = SMC,covar=covar,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor, ...=...) #call fa with the appropriate parameters f2 <- fa(f1$Phi,nfactors=nfact2,rotate=rotate,fm=fm) result <- list(f1=f1,f2=f2) class(result) <- c("psych","fa.multi") return(result) } #based upon the omegaDiagram function #revised 7/23/23 to properly put in labels for first and second stratum "fa.multi.diagram" <- function(multi.results,sort=TRUE,labels=NULL,flabels=NULL,f2labels=NULL,cut=.2,gcut=.2,simple=TRUE,errors=FALSE, digits=1,e.size=.1,rsize=.15,side=3,main=NULL,cex=NULL,color.lines=TRUE ,marg=c(.5,.5,1.5,.5),adj=2, ...) { if(color.lines) { colors <- c("black","red")} else {colors <- c("black","black") } Phi <- NULL #the default case if(is.null(cex)) cex <- 1 old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back if(sort) multi.results$f1 <- fa.sort(multi.results$f1) factors <- as.matrix(multi.results$f1$loadings) if(is.null(main)) {main <- "Hierarchical (multilevel) Structure" } gloading <- multi.results$f2$loading nvar <- num.var <- dim(factors)[1] #how many variables? num.factors <- dim(factors)[2] num.2level <- dim(gloading)[2] e.size <- e.size * 10/ nvar #this is an arbitrary setting that seems to work #first some basic setup parameters vars <- paste("V",1:num.var,sep="") if (!is.null(labels)) {vars <- paste(labels)} else{vars <- rownames(factors) } if(!is.null(flabels)) {fact <- flabels} else { fact <- paste0("F",1:num.factors,sep="") } # e.g. "F'1" "F2" "F3" rownames(factors)<- vars colnames(factors)[1:length(fact)] <- fact if(!is.null(f2labels)) {fact2<- f2labels} else { fact2 <- paste0("g",1:num.2level,sep="")} colnames(gloading) <- fact2 colnames(multi.results$f2$loadings) <- fact2 arrows.list <- list() var.rect <- list() fact.rect <- list() max.len <- max(nchar(rownames(factors)))*rsize cex <- min(cex,40/nvar) xleft <- -max.len/2 #0# xright <- nvar + 1 # or hard code to 3? plot(0,type="n",xlim=c(xleft-max.len,xright+1),ylim=c(1,nvar+1),frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) vloc <- xleft gloc <- xright grouploc <- (xright)/2 start <- 0 end <- num.factors for (v in 1:nvar) { var.rect[[v]] <- dia.rect(vloc,nvar-v+1,rownames(factors)[v],xlim=c(0,nvar),ylim=c(0,nvar),cex=cex,draw=FALSE,...) } f.scale <- (nvar+ 1)/(num.factors+1) f.shift <- nvar/num.factors for (f in 1:num.factors) { fact.rect[[f]] <- dia.ellipse(grouploc,(num.factors+1-f)*f.scale,colnames(factors)[f+start],xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,draw=FALSE,...) for (v in 1:nvar) { if (abs(factors[v,f+start]) > cut) {arrows.list <- c(arrows.list, dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,col=colors[((sign(factors[v,f+start])<0) +1)],lty=((sign(factors[v,f+start])<0)+1),labels=round(factors[v,f+start],digits),adj=f %% adj +1,draw=FALSE) ) } } } g.ellipse <- list() f.2scale <- (num.var+1)/(num.2level +1) for(g in 1:num.2level) { g.ellipse[[g] ]<- dia.ellipse(gloc,(num.2level+1-g)*f.2scale,colnames(gloading)[g],xlim=c(0,nvar),ylim=c(0,nvar),e.size=e.size,draw=FALSE,...) for (f in 1:num.factors) { if (abs(gloading[f,g]) > cut) { arrows.list <- c(arrows.list, dia.arrow(from=g.ellipse[[g]],to=fact.rect[[f]],col=colors[((sign(gloading[f,g])<0) +1)],lty=((sign(gloading[f,g])<0) +1),labels=round(gloading[f,g],digits),adj=f %% adj +1,draw=FALSE) ) }} } if (errors) {for (v in 1:nvar) { dia.self(location=var.rect[[v]],scale=.5,side=side) } } ##now draw everything at once tv <- matrix(unlist(var.rect),nrow=nvar,byrow=TRUE) all.rects.x <- tv[,5] #the center of the figure all.rects.y <- tv[,2] all.rects.names <- rownames(factors) dia.rect(all.rects.x, all.rects.y,all.rects.names) # multi.rect(var.rect,...) fv <- matrix(unlist(fact.rect),nrow=num.factors,byrow=TRUE) fv2 <- matrix(unlist(g.ellipse), byrow=TRUE,nrow=num.2level) fv <- rbind(fv,fv2) all.rects.x <- fv[,5] #the center of the figure all.rects.y <- fv[,2] all.rects.names <- c(colnames(factors),colnames(multi.results$f2$loadings)) dia.multi.ellipse(all.rects.x,all.rects.y, all.rects.names) # tv2 <- matrix(unlist(g.ellipse), byrow=TRUE,nrow=num.2level)) # # # now show all the loadings as arrows # tv <- matrix(unlist(arrows.list),byrow=TRUE,nrow=num.factors) # cname<- colnames(tv) # tv <- data.frame(tv) # tv[,c(1:18,20)] <- nchar2numeric(tv[,c(1:18,20)]) # colnames(tv) <- cname # text(tv[,1],tv[,2],tv[,3],cex=tv[,4]) # len1 <- as.numeric(tv[1,9]) # len2 <- as.numeric(tv[1,16]) # dia.arrow(x0=tv[,5],y0=tv[,6],x1=tv[,7],y1=tv[,8],length=len1,angle=30,code=1,col=tv[,19],lty=tv[,20],...) # dia.arrow(x0=tv[,12],y0=tv[,13],x1=tv[,14],y1=tv[,15],length=len2,angle=30,code=2,col=tv[,19],lty=tv[,29],...) multi.arrow(arrows.list,...) # } psych/R/cluster2keys.R0000644000176200001440000000043413573007261014370 0ustar liggesusers"cluster2keys" <- function(c) { if (inherits(c, "kmeans")) {c <- c$cluster p <- max(c) v <- length(c) keys <- matrix(rep(0,p*v),ncol=p) for (i in 1:v) {keys[i,c[i]] <- 1} } else {if(length(class(c)) >1) {if (inherits(c,"iclust")) {keys <- factor2cluster(c)} }} return(keys) } psych/R/sim.multilevel.R0000644000176200001440000001307214200252110014663 0ustar liggesuserssim.multilevel <- function(nvar=9,ngroups=4,ncases=16,rwg,rbg,eta) { e.wg <- eigen(rwg) v.wg <- pmax(e.wg$values,0) etabg <- sqrt(1-eta^2) e.bg <- eigen(rbg) v.bg <- pmax(e.bg$values,0) wg<- matrix(rnorm(nvar*ncases),ncases) wg <- scale(wg) wg <- t(e.wg$vectors %*% sqrt(diag(v.wg)) %*% t(wg)) bg <- matrix(rnorm(nvar*ngroups),ngroups) bg <- scale(bg) bg <- e.bg$vectors %*% sqrt(diag(v.bg)) %*% t(bg) bg <- matrix(rep(bg, (ncases/ngroups)),nrow=ncases,byrow=TRUE) gr <- rep((1:ngroups),(ncases/ngroups)) XY <- wg %*% diag(eta^2) + bg %*% diag(etabg^2) XY <- cbind(gr,XY) colnames(XY) <- c("Group",paste("V",1:nvar,sep="")) result <- list(wg=wg,bg=bg,xy=XY) } #Created January 28, 2017 #meant to simulate a number of within subject multilevel models #Created January 28, 2017 #meant to simulate a number of within subject multilevel models "sim.multi" <- function(n.obs=4,nvar = 2,nfact=2, ntrials=96,days=16,mu=0,sigma=1,fact=NULL,loading=.9,phi=0,phi.i = NULL,beta.i=0,mu.i=0,sigma.i = 1,sin.i=0,cos.i=0,AR1=0,f.i=NULL,plot=TRUE) { if(missing(n.obs)) n.obs=4 X <- list() Xjk <- matrix(NA,ncol=nvar +nfact,nrow=ntrials) if(missing(mu) ) mu <- rep(0,nvar) if(missing(sigma)) sigma <- rep(1,nvar) #if(missing(phi.i)) {phi.i <- phi} if(missing(beta.i)) { beta.i <- matrix(0,ncol=nvar,nrow=n.obs) } else { if(length(beta.i) < n.obs) {beta.i <- matrix(beta.i,ncol=nvar,nrow=n.obs,byrow=TRUE) } } if(missing(mu.i)) mu.i <- matrix(0,ncol=nvar,nrow=n.obs) if(missing(sigma.i)) {sigma.i <- matrix(1,ncol=nvar,nrow=n.obs)} else { if(length(sigma.i) < n.obs) {sigma.i <- matrix(sigma.i,ncol =nvar,nrow=n.obs,byrow=TRUE)} } if(missing(sin.i)) {sin.i <- matrix(0,ncol=nvar,nrow=n.obs)} else { if (length(sin.i) < n.obs) {sin.i <- matrix(sin.i,ncol=nvar,nrow=n.obs,byrow= TRUE) } } if(missing(cos.i)) {cos.i <- matrix(0,ncol=nvar,nrow=n.obs) } else { if (length(cos.i) < n.obs) {cos.i <- matrix(cos.i,ncol=nvar,nrow=n.obs,byrow= TRUE) } } if(missing(AR1)) {AR1 <- matrix(0,ncol=nvar,nrow=n.obs) } else { if (length(AR1) < n.obs) {AR1 <- matrix(AR1,ncol=nfact,nrow=n.obs,byrow= TRUE) } } if(is.null(phi)) {phi <-diag(1,nfact) } else {phi <- matrix(phi,ncol=nfact,nrow=nfact) diag(phi) <- 1} if(!is.null(phi.i)) {if(length(phi.i) < n.obs) {phi.i <- rep(phi.i,n.obs/length(phi.i))} } if(nfact > 1) { if(is.null(fact)) { #these are the group level factor loadings fact <- matrix(0,nrow=nvar,ncol=nfact) # fact[ ] =((( col(fact)+ row(fact)) %% nfact )) * loading # fact[((round(row(fact)/nvar))+1) == col(fact)] <- loading #just works for two factors! for(j in 1:nfact) { #fact[((j-1)*nvar/nfact +1):j*nvar/nfact,j] <- loading fact[((j-1) * nvar/nfact +1):(j*nvar/nfact),j] <- loading } fact<- (fact %*% phi )}} else { fact <- matrix(loading,ncol=nfact,nrow=nvar) } if(is.null(f.i)) { f.i <- list() for (i in 1:n.obs) { f.i[[i]] <- fact } } trials.day <- ntrials/days hours <- 24/trials.day time <- seq(hours,days * trials.day*hours,hours) t.radian <- time * pi /12 for (i in 1:n.obs) { xij <- rnorm((nvar + nfact),mu,sigma) #between subjects for(j in 1:nfact) { #first generate the factor scores that have a within subject model error <- rnorm(ntrials,mu.i[i,j],sigma.i[i,j]) lagerror <- c(0, error[1:(ntrials-1)]) Xjk[,j] <- xij[j] + mu[j] +beta.i[i,j] *time/ntrials + sin(t.radian)*sin.i[i,j] + cos(t.radian)*cos.i[i,j] + error + AR1[i,j] * lagerror } #factor scores are the first nfact elemements of Xjk #now, generate item scores if(is.null(phi.i)) {phi.ind <- diag(1,nfact) } else {phi.ind <- matrix(phi.i[i],nfact,nfact) diag(phi.ind) <- 1} Xjk[,1:nfact] <- Xjk [,1:nfact] %*% phi.ind #these are the factor scores for the ith subject for(k in 1:nvar) { uniq <- sqrt(1 - sum(f.i[[i]][k,]^2)) #the uniqueness is 1-h2 uniq.err <- rnorm(ntrials,0,uniq) score <- 0 for (j in 1:nfact) { score <- score + Xjk[,j] * f.i[[i]][k,j] #these are orthogonal factor scores * loadings -- can add phi into this } Xjk[,nfact + k ] <- score + uniq.err #these are the variable scores } X[[i]] <- Xjk #save them for this subject } #now, lets summarize what we have done DV <- unlist(X) #This organizes the data with a separate column for each variable for analysis by statsBy dv.a <- array(DV,dim=c(ntrials,nvar+ nfact,n.obs)) dv.m <-NULL for(i in 1:(nvar+nfact)) { dv.m <- cbind(dv.m,as.vector(dv.a[,i,])) } dv.df <- data.frame(dv.m,time = rep(time,n.obs),id=rep(1:n.obs,each=ntrials)) colnames(dv.df)[1:(nfact+nvar)] <- c(paste0("F",1:nfact),paste0("V",1:nvar)) # #However, if we want to plot it, we need to rearrange a bit more if(plot) { IV <- NULL #to get around the problem that RCMD check thinks this is global vars <- c(paste0("F",1:nfact),paste0("V",1:nvar)) select <- rep(NA,nvar * ntrials * n.obs) #we use select to pick out the variables, but leave the factors kount <- ntrials*nfact for(i in 1:n.obs) { select[(1:(ntrials*nvar)+(i-1) * ntrials *nvar)] <- kount + 1:(ntrials*nvar) kount <- kount + ntrials *( nvar+nfact) } vars <- paste0("V",1:nvar) X.df <- data.frame(DV = DV[select], time=rep(time,(n.obs*(nvar))),id = rep(1:n.obs,each=(ntrials*(nvar))),IV = rep(rep(vars,each=ntrials),n.obs) ) plot1<- xyplot(DV ~ time | id, group=IV, data=X.df, type = "b",as.table=TRUE,strip=strip.custom(strip.names=TRUE,strip.levels=TRUE),col=c("blue","red","black","grey")) print(plot1) } invisible(dv.df) #this returns the scores if we want to do further processing } psych/R/ICLUST.diagram.R0000644000176200001440000003646114522031371014342 0ustar liggesusers#modified 6/6/20 to vectorize the labels and rectangles. #modified 8/19/22 to make labels characters (which they are normally) "iclust.diagram" <- function(ic,labels=NULL,short=FALSE,digits=2,cex=NULL,min.size=NULL,e.size=1,colors=c("black","blue"), main="ICLUST diagram", cluster.names = NULL,marg=c(.5,.5,1.5,.5),plot=TRUE,bottomup=TRUE) { old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back clusters <- ic$results #the main table from ICLUST num <- nrow(clusters) num.var <- num+1 if(is.null(cex)) cex <- min(16/num.var,1) if (is.null(labels)) { var.labels <- rownames(ic$loadings)} else {var.labels=labels} if (short) {var.labels <- paste("V",1:num.var,sep="")} if(is.null(var.labels)) {var.labels <- paste("V",1:num.var,sep="")} var.labels <- as.character(var.labels) #added 8/19/22 fixed <- fix.names(ic,var.labels) clusters <- fixed$ic$results max.len <- max(nchar((var.labels))) if(is.null(cluster.names)) cluster.names <- rownames(clusters) #added Sept 2, 2012 names(cluster.names) <- rownames(clusters) length.labels <- max(max.len* .15 * cex,.25*cex) ## nc <- length(ic$size) nvar <- sum(ic$size) last <- dim(clusters)[1] max.size <- max(ic$size) #limx <- c(-length.labels,nvar+2) #for long names and not many variables this is ugly if(nvar < 12) {limx <- c(-max.len*.08 * cex,nvar+2)} else {limx <- c(-length.labels,nvar+2)} #for long names and not many variables this is ugly limy <- c(0,nvar+1) if(nvar < 12) e.size <- e.size * .7 #this is a kludge to make small problems look better if(is.null(min.size)) min.size <- .1 * nvar if(plot) {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) new.max.len <- max(strwidth(var.labels,units="user"))} else {new.max.len =10} if (new.max.len > max.len) {limx <- c(-new.max.len/2,nvar+2) if(plot) plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main)} top <- num.var done <- 0 rect.list <- list() arrow.list <- list() cluster.list <- list() if (nc==1) {head <- num size <- num.var y.loc <- clusters[head,"size2"] v.loc <- down(clusters,head,size,y.loc,old.head= NULL,old.loc=NULL,min.size=min.size,e.size=e.size,digits=digits,cex=cex,limx=limx,limy=limy,colors=colors,cluster.names=cluster.names,rect.list=rect.list,arrow.list=arrow.list,cluster.list=cluster.list,bottomup=bottomup) rect.list <- c(rect.list$rect.list,v.loc$rect.list) cluster.list <- v.loc$cluster.list arrow.list <- v.loc$arrow.list } else { #the multiple cluster case for(clust in 1:nc) { #size <- ic$size[clust] size <- sum(abs(ic$clusters[,clust])) if (substr(colnames(ic$clusters)[clust],1,1)=="C") { #head <- which(rownames(clusters)==names(ic$size[clust])) head <- which(rownames(clusters)==colnames(ic$clusters)[clust]) cluster <- clusters[head,] y.loc <- clusters[head,"size2"] + done v.loc <- down(clusters,head,size,y.loc,old.head= NULL,old.loc=NULL,min.size=min.size,e.size=e.size,digits=digits,cex=cex,limx=limx,limy=limy,colors=colors,cluster.names = cluster.names,rect.list=rect.list,arrow.list=arrow.list,cluster.list=cluster.list,bottomup=bottomup) rect.list <- v.loc$rect.list cluster.list <- v.loc$cluster.list arrow.list <- v.loc$arrow.list } else {v.name <- names(which(ic$clusters[,clust] ==1)) #the case of a non-clustered variable v.loc <- dia.rect(0,done+.5,v.name,xlim=limx,ylim=limy,cex=cex,draw=FALSE) rect.list <- c(rect.list,v.loc,v.name) } done <- done + size } } #we have gathered the variables, the clusters and the arrows, now show them rect.mat <- matrix(unlist(rect.list),ncol=12,byrow=TRUE) rect.df <- as.data.frame(rect.mat,stringsAsFactors=FALSE) colnames(rect.df ) <- c("left","y","right","right.y","topx","topy", "xbott","botty","centerx","centery","radius","lab") if(plot) { text(as.numeric(rect.df$centerx),as.numeric(rect.df$centery),rect.df$lab,cex=cex) rect(as.numeric(rect.df$left),as.numeric(rect.df$botty),as.numeric(rect.df$right),as.numeric(rect.df$topy)) cluster.mat <- matrix(unlist(cluster.list),ncol=15,byrow=TRUE) cluster.df <- data.frame(cluster.mat,stringsAsFactors=FALSE) cluster.df[c(1:12,14:15)] <- nchar2numeric(cluster.df[c(1:12,14:15)]) colnames(cluster.df ) <- c("left","yl","right","yr","topx","topy","xbott","botty","centerx","centery","link","radius","lab","alpha","beta") rownames(cluster.df) <- cluster.df$lab dia.cluster1(cluster.df,cex=cex,e.size=e.size, digits=digits) arrow.mat <- matrix(unlist(arrow.list),ncol=21,byrow=TRUE) arrow.df <- data.frame(arrow.mat,stringsAsFactors=FALSE) arrow.df[c(1:19,21)] <- nchar2numeric(arrow.df[c(1:19,21)]) tv <- arrow.df text(tv[,1],tv[,2],tv[,3],cex=tv[,5]) arrows(x0=tv[,6],y0=tv[,7],x1=tv[,8],y1=tv[,9],length=tv[1,10],angle=tv[1,11],code=1,col=tv[,20],lty=tv[,21]) arrows(x0=tv[,13],y0=tv[,14],x1=tv[,15],y1=tv[,16],length=tv[1,17],angle=tv[1,18],code=2,col=tv[,20],lty=tv[,21]) } #end of plot sorted.order <- psychTools::dfOrder(data.frame(y=as.numeric(rect.df[,"y"]), lab= rect.df[,"lab"]),ascending=TRUE)[,"lab"] invisible(sorted.order) } #end of iclust.diagram fix.names <- function(ic,var.labels) { var.names <- ic$results[,c(1:2)] max.len <- 0 vn <- dim(var.names)[1] for(i in 1:vn) { vname <- sub("V","",var.names[i,1]) suppressWarnings(vname <- as.numeric(vname) ) if(!is.na(vname) & (vname < 1)) vname <- NA if(!is.na(vname)) {var.names[i,1] <- var.labels[vname] if(max.len < nchar(var.labels[vname])) max.len <- nchar(var.labels[vname]) } vname <- sub("V","",var.names[i,2]) suppressWarnings(vname <- as.numeric(vname) ) if(!is.na(vname) & (vname < 1)) vname <- NA if(!is.na(vname)) {var.names[i,2] <- var.labels[vname] if(max.len < nchar(var.labels[vname])) max.len <- nchar(var.labels[vname]) } } ic$results[,c(1:2)] <- var.names return(list(ic=ic,max.len=max.len)) } "dia.cluster" <- function(x, y = NULL, cluster, link=NA, digits=2,cex = cex,e.size=.6,xlim=c(0,1),ylim=c(0,1),small=FALSE,cluster.names,draw=FALSE) { if(draw) { if(!small){ text(x,y, (cluster.names[rownames(cluster)]),pos=3,cex=cex) text(x,y, substitute(list(alpha) == list(a),list(a=round(cluster[1,"alpha"],digits))),cex=cex) text(x,y, substitute(list(beta) == list(b), list(b=round(cluster[1,"beta"],digits))),cex=cex,pos=1) xs <- dia.ellipse1(x,y,xlim=xlim,ylim=ylim,e.size=e.size,draw=draw )} else { text(x,y, (cluster.names[rownames(cluster)]),cex=cex) xs <- dia.ellipse1(x,y,xlim=xlim,ylim=ylim,e.size=e.size *.75) } } #just save the information for later drawing if (!draw) { xs <- dia.ellipse1(x,y,xlim=xlim,ylim=ylim,e.size=e.size,draw=FALSE ) if(small) {clust.info <- list(cluster= rownames(cluster),alpha=NA, beta = NA)} else { clust.info <- list(cluster= rownames(cluster),alpha=round(cluster[1,"alpha"],digits), beta = round(cluster[1,"beta"],digits)) } } vert <- cex*.3 left <- c(x-xs,y) right <- c(x+xs,y) top <- c(x,y+xs) bottom <- c(x,y-xs) center <- c(x,y) dia.cluster <- list(left=left,right=right,top=top,bottom=bottom,center=center,link=link,radius=xs , clust.info) } #June 27, 2020 revised to allow for faster drawing #this draws all the clusters at once #still a little slow, but better than before # By putting NA at the end of every unit.circle, we can draw multiple circles rapidly #modified 8/21/22 to include "beta" "dia.cluster1" <- function(cluster.df,digits=2,cex = cex,e.size=.6,xlim=c(0,1),ylim=c(0,1)) { big <- cluster.df[!is.na(cluster.df[,"alpha"]),] x.big <- big[,"centerx"] y.big <- big[,"centery"] small <- cluster.df[is.na(cluster.df[,"alpha"]),] x <- cluster.df[,"centerx"] y <- cluster.df[,"centery"] text(x.big,y.big, rownames(big),pos=3,cex=cex) #all clusters have names #these next two lines just report the first values temp.alpha <- substitute(list(alpha) == "") temp.beta <- substitute(list(beta) == "") text(x.big,y.big,temp.alpha,cex=cex,adj=1) #this shows the symbol alpha to the left text(x.big,y.big,temp.beta, cex=cex,adj=c(1,1.5)) #below and to the left text(x.big,y.big,round(big[,"alpha"],digits),offset=1,adj=0,cex=cex) # text(x.big,y.big,round(big[,"beta"],digits),cex=cex,pos=1) #text(x.big,y.big,round(big[,"beta"],digits),cex=cex,pos=1) text(x.big,y.big,round(big[,"beta"],digits),cex=cex,adj=c(0,1.7)) #text(x.big,y.big, substitute(list(alpha) == list(a),list(a=round(big[,"alpha"],digits))),cex=cex) #text(x.big,y.big, substitute(list(beta) == list(b), list(b=round(big[,"beta"],digits))),cex=cex,pos=1) # temp.n <- NROW(big) # for(i in 1:temp.n) {text(x.big[i],y.big[i],substitute(list(alpha) == list(a),list(a=round(big[i,"alpha"],digits))),cex=cex) # text(x.big[i],y.big[i],substitute(list(beta) == list(a),list(a=round(big[i,"beta"],digits))),cex=cex,pos=1) } #do the geometric work just once segments = 51 angles <- c((0:segments) * 2 * pi/segments,NA) unit.circle <- cbind(cos(angles), sin(angles)) #this will break xrange = (xlim[2] - xlim[1]) yrange = (ylim[2] - ylim[1]) xs <- e.size * xrange #store the values for drawing ellipsex <- rep(x.big,each=(segments + 2)) + unit.circle[,1] * xs ellipsey <- rep(y.big,each=(segments + 2)) + unit.circle[,2] *xs lines(ellipsex,ellipsey) if(NROW(small)>0) { x.small <- small[,"centerx"] y.small <- small[,"centery"] text(x.small,y.small, rownames(small),cex=cex) nc <- NROW(small) ellipsex <- rep(x.small,each=(segments + 2)) + unit.circle[,1] *xs * .75 ellipsey <- rep(y.small,each=(segments + 2)) + unit.circle[,2] *xs * .75 lines(ellipsex,ellipsey) } } #down is a recursive function that draws the complete cluster structure "down" <- function(clusters,head,x,y,sign.clust=1,old.head = NULL,old.loc=NULL,digits,cex,limx,limy,min.size=1,e.size=.6,color.lines=TRUE,colors=c("black","blue"),cluster.names,rect.list,arrow.list,cluster.list,bottomup) { a.loc <- NULL shift <- 2 size <- clusters[head,"size"] cluster <- clusters[head,] if(is.null(old.loc)) {link <- NA} else {link <- old.head} #remember the cluster that spawned this cluster if(size > min.size) {c.loc <- dia.cluster(head+shift,y,cluster,link=link,digits=digits,cex=cex,e.size=e.size,cluster.names=cluster.names) cluster.list <- c(cluster.list,c.loc) } else {c.loc <- dia.cluster(head+2,y,cluster,link=link,digits=digits,cex=cex,e.size=e.size*.6,small=TRUE,cluster.names=cluster.names) cluster.list <- c(cluster.list,c.loc) } if(!is.null(old.loc)) { if(old.loc$top[2] < c.loc$top[2]) {labels <- round(clusters[c.loc$link,"r1"],digits) } else { labels <- round(clusters[c.loc$link,"r2"],digits)} sign.clust <- sign(labels) if(old.loc$left[1] < c.loc$right[1]) { if(old.loc$left[2] < c.loc$right[2]) { sign.clust <- sign(labels) if(bottomup) {a.loc <- dia.arrow(c.loc,old.loc,labels=labels,cex=cex,col=colors[((sign.clust < 0)+1)],lty=(sign.clust < 0)+1,draw=FALSE)} else { a.loc <- dia.arrow(old.loc,c.loc,labels=labels,cex=cex,col=colors[((sign.clust < 0)+1)],lty=(sign.clust < 0)+1,draw=FALSE)}} else { if(bottomup) { a.loc <- dia.arrow(c.loc,old.loc,labels=labels,cex=cex,col=colors[((sign.clust <0)+1)],lty=((sign.clust)<0)+1,draw=FALSE)} else { a.loc <- dia.arrow(old.loc,c.loc,labels=labels,cex=cex,col=colors[((sign.clust <0)+1)],lty=((sign.clust)<0)+1,draw=FALSE)}}} else { if(bottomup){a.loc <- dia.arrow(c.loc,old.loc,labels=labels,cex=cex,col=colors[((sign(labels)<0)+1)],lty=((sign(labels)<0)+1),draw=FALSE) } else { a.loc <- dia.arrow(old.loc,c.loc,labels=labels,cex=cex,col=colors[((sign(labels)<0)+1)],lty=((sign(labels)<0)+1),draw=FALSE)}}} size1 <- clusters[head,"size1"] size2 <- clusters[head,"size2"] arrow.list <- c(arrow.list,a.loc) if(size1==1) { v.loc <- dia.rect(0,y+.5,clusters[head,1],xlim=limx,ylim=limy,cex=cex,draw=FALSE) rect.list <- c(rect.list,v.loc,clusters[head,1]) #sign.clust <- sign.clust *sign(cluster["r1"]) sign.clust <- sign(cluster["r1"]) if(bottomup) {a.loc <- dia.arrow(v.loc,c.loc,round(cluster["r1"],digits),cex=cex,col=colors[((sign.clust)<0) +1],lty=((sign.clust) <0)+ 1,draw=FALSE)} else { a.loc <- dia.arrow(c.loc,v.loc,round(cluster["r1"],digits),cex=cex,col=colors[((sign.clust)<0) +1],lty=((sign.clust) <0)+ 1,draw=FALSE) } arrow.list <- c(arrow.list,a.loc) } else { head1 <- which(rownames(clusters)== clusters[head,1]) cluster <- clusters[head1,] #get ready to go down the tree y.shift <- clusters[head1,"size2"] v.loc <- down(clusters,head1,x,y+y.shift,sign.clust,old.head=head,old.loc = c.loc,min.size=min.size,e.size=e.size,digits=digits,cex=cex,limx=limx,limy=limy,colors=colors,cluster.names=cluster.names,rect.list=rect.list,arrow.list=arrow.list,cluster.list=cluster.list,bottomup=bottomup) rect.list <- v.loc$rect.list cluster.list <- v.loc$cluster.list arrow.list <- v.loc$arrow.list } if(size2==1) { v.loc <- dia.rect(0,y-.5,clusters[head,2],xlim=limx,ylim=limy,cex=cex,draw=FALSE) rect.list <- c(rect.list,v.loc,clusters[head,2]) sign.clust <- sign(clusters[head,"r2"]) #sign.clust <- sign(clusters[head,"r2"]) if(bottomup) {a.loc <- dia.arrow(v.loc,c.loc,labels = round(clusters[head,"r2"],digits),cex=cex,col=colors[((sign.clust)<0) +1],lty=((sign.clust)<0) + 1, draw=FALSE) } else { a.loc <- dia.arrow(c.loc,v.loc,labels = round(clusters[head,"r2"],digits),cex=cex,col=colors[((sign.clust)<0) +1],lty=((sign.clust)<0) + 1, draw=FALSE)} arrow.list <- c(arrow.list,a.loc) } else { old.head <- head head <- which(rownames(clusters)== clusters[head,2]) cluster <- clusters[head,] y.shift <- clusters[head,"size1"] v.loc <- down(clusters,head,x,y-y.shift,sign.clust,old.head=old.head,old.loc = c.loc,min.size=min.size,e.size=e.size,digits=digits,cex=cex,limx=limx,limy=limy,colors=colors,cluster.names=cluster.names,rect.list=rect.list,arrow.list=arrow.list,cluster.list=cluster.list,bottomup=bottomup) rect.list <- v.loc$rect.list cluster.list <- v.loc$cluster.list arrow.list <- v.loc$arrow.list } invisible(list(rect.list=rect.list,arrow.list=arrow.list,cluster.list=cluster.list)) } psych/R/ICLUST.cluster.R0000644000176200001440000003173612260564364014431 0ustar liggesusersICLUST.cluster <- function (r.mat,ICLUST.options,smc.items) {#should allow for raw data, correlation or covariances #options: alpha =1 (minimum alpha) 2 (average alpha) 3 (maximum alpha) # beta =1 (minimum beta) 2 (average beta) 3 (maximum beta) # correct for reliability # reverse score items if negative correlations # stop clustering if beta for new clusters < beta.min # output =1 (short) 2 (show steps) 3 show rejects as we go # #initialize various arrays and get ready for the first pass output <- ICLUST.options$output num.var <- nrow(r.mat) keep.clustering <- TRUE #used to determine when we are finished clustering results <- data.frame(matrix(rep(0,18*(num.var-1)),ncol=18)) #create the data frame for the results #results <- matrix(rep(0,18*(num.var-1)),ncol=18) #use a matrix for speed but we can not address it by name names(results) <- c("Item/Cluster", "Item/Clust","similarity","correlation","alpha1","alpha2", "beta1","beta2","size1","size2","rbar1","rbar2","r1","r2","alpha","beta","rbar","size") rownames(results) <- paste("C",1:(num.var-1),sep="") digits <- ICLUST.options$digits clusters <- diag(1,nrow =nrow(r.mat)) #original cluster structure is 1 item clusters if(is.null(rownames(r.mat))) {rownames(r.mat) <- paste("V",1:num.var,sep="") } rownames(clusters) <- rownames(r.mat) colnames(clusters) <- paste("V",1:num.var,sep="") diag(r.mat) <- 0 row.range <- apply(r.mat,1,range,na.rm=TRUE) item.max<- pmax(abs(row.range[1,]),abs(row.range[2,])) #find the largest absolute similarity diag(r.mat) <- 1 count=1 #master loop while (keep.clustering) { #loop until we figure out we should stop #find similiarities #we will do most of the work on a copy of the r.mat #cluster.stats <- cluster.cor(clusters,r.mat,FALSE,SMC=ICLUST.options$SMC) #deleted 30/12/13 cluster.stats <- cluster.cor(clusters,r.mat,FALSE,SMC=ICLUST.options$SMC,item.smc=smc.items) sim.mat <- cluster.stats$cor #the correlation matrix diag(sim.mat) <- 0 #we don't want 1's on the diagonal to mess up the maximum #two ways to estimate reliability -- for 1 item clusters, max correlation, for >1, alpha #this use of initial max should be an option if (ICLUST.options$correct) { #find the largest and smallest similarities for each variable row.range <- apply(sim.mat,1,range,na.rm=TRUE) row.max <- pmax(abs(row.range[1,]),abs(row.range[2,])) #find the largest absolute similarity } else {row.max <- rep(1, nrow(sim.mat)) } #don't correct for largest similarity item.rel <- cluster.stats$alpha for (i in 1: length(item.rel)) { if (cluster.stats$size[i]<2) { item.rel[i] <- row.max[i] #figure out item betas here? }} if(output>3) print(sim.mat,digits=digits) #this is the corrected for maximum r similarities if (ICLUST.options$correct) { sq.max <- diag(1/sqrt(item.rel)) #used to correct for reliabilities sim <- sq.max %*% sim.mat %*% sq.max #this corrects for reliabilities but messes up the correlations of two item clusters with items } else {sim <- sim.mat} diag(sim) <- NA #we need to not consider the diagonal when looking for maxima #find the most similar pair and apply tests if we should combine test.alpha <- FALSE test.beta <- FALSE while(!(test.alpha&test.beta)){ max.cell <- which.max(sim) #global maximum if (length(max.cell) < 1) { keep.clustering <- FALSE break} #there are no non-NA values left sign.max <- 1 if ( ICLUST.options$reverse ) { #normal case is to reflect if necessary min.cell <- which.min(sim) #location of global minimum if (sim[max.cell] < abs(sim[min.cell] )) { sign.max <- -1 max.cell <- min.cell } if (sim[max.cell] < 0.0) {sign.max <- -1 }} #this is a weird case where all the similarities are negative -- happens towards the end of clustering max.col <- trunc(max.cell/nrow(sim))+1 #is in which row and column? max.row <- max.cell - (max.col-1)*nrow(sim) #need to fix the case of first column if (max.row < 1) {max.row <- nrow(sim) max.col <- max.col-1 } size1 <- cluster.stats$size[max.row] if(size1 < 2) {V1 <- 1 beta1 <- item.rel[max.row] alpha1 <- item.rel[max.row] rbar1 <- item.rel[max.row] } else { rbar1 <- results[cluster.names[max.row],"rbar"] beta1 <- results[cluster.names[max.row],"beta"] alpha1 <- results[cluster.names[max.row],"alpha"] V1 <- size1 + size1*(size1-1) * rbar1 } size2 <- cluster.stats$size[max.col] if(size2 < 2) {V2 <- 1 beta2 <- item.rel[max.col] alpha2 <- item.rel[max.col] rbar2 <- item.rel[max.col] } else { rbar2 <- results[cluster.names[max.col],"rbar"] beta2 <- results[cluster.names[max.col],"beta"] alpha2 <- results[cluster.names[max.col],"alpha"] V2 <- size2 + size2 * (size2-1) * rbar2} Cov12 <- sign.max * sim.mat[max.cell] * sqrt(V1*V2) #this flips the sign of the correlation for negative correlations r12 <- Cov12/(size1*size2) #average between cluster r V12 <- V1 + V2 + 2 * Cov12 #the variance of the new cluster size12 <- size1 + size2 V12c <- (V12 - size12)*(size12/(size12-1)) #true variance (using the average r on the diagonal) rbar <- V12c/(size12^2) alpha <- V12c/V12 #combine these two rows if the various criterion are passed #beta.weighted <- size12^2 * sign.max *r12/V12 #this was added June, 2009 but can produce negative betas beta.weighted <- size12^2 *r12/V12 #corrected July 28, 2009 beta.unweighted <- 2* sign.max*sim.mat[max.cell]/(1+sign.max* sim.mat[max.cell]) if(ICLUST.options$weighted) {beta.combined <- beta.weighted} else {beta.combined <- beta.unweighted} #what is the correlation of this new cluster with the two subclusters? #this considers item overlap problems #There are actually two alternative solutions #a) (cor.gen=TRUE) finds the correlation due to a shared general factor #b) (cor.gen=FALSE) finds the correlation for the general + group but remove the item overlap problem #neither seems optimal, in that a) will correctly identify non-correlated clusters, but b) is less affected by small clusters. if(ICLUST.options$cor.gen) { c1 <- r12 * size1 * size1 + Cov12 #corrected covariance c2 <- sign.max*(r12 * size2 * size2 + Cov12) } else { c1 <- size1^2* rbar1 + Cov12 c2 <- sign.max*(size2^2 *rbar2 + Cov12) } if((size1 < 2) && (size2 < 2)) { #r2 should be flipped if necessary -- r2 is always flipped (if necessary) when forming clusters r1 <- sqrt(abs(rbar1)) #this corrects for reliability in a two item cluster r2 <- sign.max* r1 #flips the sign if two are negatively correlated -- in the case of two items } else { #this next part corrects for item overlap as well as reliability of the subcluster if (ICLUST.options$correct.cluster) { #correct is the default option if(TRUE) {r1 <- c1/sqrt((V1 - size1 +size1 * rbar1) * V12) if (size2 < 2) { r2 <- c2/sqrt(abs(rbar2)*V12)} else { # r2 <- sign.max * c2/sqrt((V2-size2 + size2 * rbar2)*V12c)} #changed yet again on 6/10/10 r2 <- c2/sqrt((V2-size2 + size2 * rbar2)*V12c)} } else { if(size1 < 2 ) { r1 <- c1/sqrt(abs(rbar1)*V12)} else { r1 <- c1/sqrt((V1-size1 + size1 * rbar1)*V12c) } #flip the smaller of the two clusters -- no, flip r2 if (size2 < 2) {r2 <- c2/sqrt(abs(rbar2)*V12)} else { r2 <- c2/sqrt((V2-size2 + size2*rbar2)*V12c)} # r2 <- c2/sqrt((V2-size2+size2*rbar2)*V12c) } } else {if(TRUE) {r1 <- c1/sqrt(V1*V12) #do not correct r2 <- sign.max* c2/sqrt(V2*V12) } else { r1 <-sign.max* c1/sqrt(V1*V12) } #flip the smaller of the two clusters - flip r2 r2 <- c2/sqrt(V2*V12) } } #test if we should combine these two clusters #first, does alpha increase? test.alpha <- TRUE if (ICLUST.options$alpha>0) { #should we apply the alpha test? if (ICLUST.options$alpha.size < min(size1,size2)) { switch(ICLUST.options$alpha, {if (alpha < min(alpha1,alpha2)) {if (output>2) {print( paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col], 'new alpha =', alpha,'old alpha1 =', alpha1,"old alpha2 =",alpha2))} test.alpha <- FALSE }}, {if (alpha < mean(alpha1,alpha2)) {if (output>2) {print(paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col],'new alpha =',alpha, 'old alpha1 =',alpha1,"old alpha2 =",alpha2))} test.alpha <- FALSE }}, {if (alpha < max(alpha1,alpha2)) {if (output>2) {print(paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col],'new alpha =', alpha, 'old alpha1 =',alpha1,"old alpha2 =",alpha2))} test.alpha <- FALSE }}) #end switch } #end if options$alpha.size } #second, does beta increase ? test.beta <- TRUE if (ICLUST.options$beta>0) { #should we apply the beta test? if (ICLUST.options$beta.size < min(size1,size2)) { switch(ICLUST.options$beta, {if (beta.combined < min(beta1,beta2)) {if (output>2) {print( paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col],'new beta =', round (beta.combined,digits),'old beta1 =',round( beta1,digits),"old beta2 =",round(beta2,digits)))} test.beta <- FALSE }}, {if (beta.combined < mean(beta1,beta2)) {if (output>2) {print(paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col],'new beta =', round (beta.combined,digits), 'old beta1 =',round( beta1,digits),"old beta2 =",round(beta2,digits)))} test.beta <- FALSE }}, {if (beta.combined < max(beta1,beta2)) {if (output>2) {print(paste ('do not combine ', cluster.names[max.row],"with", cluster.names[max.col],'new beta =', round (beta.combined,digits), 'old beta1 =',round( beta1,digits),"old beta2 =",round(beta2,digits)))} test.beta <- FALSE }}) #end switch } #end if options$beta.size } if(test.beta & test.alpha) { break} else { #we have failed the combining criteria if((ICLUST.options$n.clus > 0) & ((num.var - count ) >= ICLUST.options$n.clus) ) {warning ("Clusters formed as requested do not meet the alpha and beta criteria. Perhaps you should rethink the number of cluster settings.") break } else { if (beta.combined < ICLUST.options$beta.min) { keep.clustering <- FALSE #the most similiar pair is not very similar, we should quit break} else { sim[max.row,max.col] <- NA sim[max.col,max.row] <- NA }} } #end of test.beta & test.alpha } #end of while test.alpha & test.beta.loop #combine and summarize if (keep.clustering) { # we have passed the alpha and beta tests, now combine these two variables clusters[,max.row] <- clusters[,max.row] + sign.max * clusters[,max.col] cluster.names <- colnames(clusters) #summarize the results results[count,1] <- cluster.names[max.row] results[count,2] <- cluster.names[max.col] results[count,"similarity"] <- sim[max.cell] results[count,"correlation"] <- sim.mat[max.cell] results[count,"alpha1"] <- item.rel[max.row] results[count,"alpha2"] <- item.rel[max.col] size1 <- cluster.stats$size[max.row] size2 <- cluster.stats$size[max.col] results[count,"size1"] <- size1 results[count,"size2"] <- size2 results[count,"beta1"] <- beta1 results[count,"beta2"] <- beta2 results[count,"rbar1"] <- rbar1 results[count,"rbar2"] <- rbar2 results[count,"r1"] <- r1 results[count,"r2"] <- r2 results[count,"beta"] <- beta.combined results[count,'alpha'] <- alpha results[count,'rbar'] <- rbar results[count,"size"] <- size12 #update cluster.names[max.row] <- paste("C",count,sep="") colnames(clusters) <- cluster.names clusters <- clusters[,-max.col] cluster.names <- colnames(clusters) #row.max <- row.max[-max.col] } #end of combine section if(output > 1) print(results[count,],digits=digits) count=count+1 if ((num.var - count) < ICLUST.options$n.clus) {keep.clustering <- FALSE} if(num.var - count < 1) {keep.clustering <- FALSE} #only one cluster left } #end of keep clustering loop #make clusters in the direction of the majority of the items #direct <- -(colSums(clusters) < 0 ) #clusters <- t(diag(direct) %*% t(clusters)) #colnames(clusters) <- cluster.names ICLUST.cluster <- list(results=results,clusters=clusters,number <- num.var - count) } # end ICLUST.cluster #modified June 12, 2008 to calculate the item-cluster correlation for cluster of size 2 #modified June 14, 2009 to find weighted or unweighted beta #unweighted had been the default option before but it would seem that weighted makes more sense #modified June 5, 2010 to correct the graphic tree pathspsych/R/bigCor.R0000644000176200001440000000523014375544202013137 0ustar liggesusers#Function to find very big correlation matrices by stitching together smaller ones #see the help file for various timings bigCor <- function(x,size=NULL,use="pairwise",cor="pearson"){ nvar <- NCOL(x) if (is.null(size)) size <- 20 #added round 6/21/22 switched to 20 2/22/23 n.steps <- ceiling(nvar/size) #first makes sure the data are all numeric x <- char2numeric(x) #added 2/21/23 small.r <- matrix(NA,size,size) short <- function(i) { res <- list() loweri <- (i-1) * size + 1 upperi <- min(i * size,nvar) lowerj <- min((i-1) * size + 1,nvar) upperj <- min(i * size,nvar) #small.r <- cor(x[,loweri:upperi],use=use) for (j in i:n.steps) { lowerj <- min((j-1) * size + 1,nvar) upperj <- min(j * size,nvar) if(cor=="pearson") { small.r <- cor(x[,loweri:upperi],x[lowerj:upperj],use=use)} else { small.r <- t(polychoric(x[loweri:upperi],x[lowerj:upperj])$rho)} #fixed for polychoric res[[j]] <- t(small.r) } return(res) } result <- mcmapply(short,c(1:n.steps)) #now, stitch them together R <- matrix(NA,nvar,nvar) k <- 1 for(i in 1:NROW(result)) { nx <- dim(result[[i,1]])[1] #temp <- result[[i]] temp <- unlist(result[i,]) R[k:(k+ nx-1),1:(k+ nx-1) ] <- temp R[1:(k+nx -1),k:(k+ nx-1)] <- t( R[k:(k+ nx-1),1:(k+ nx-1) ]) k <- k +nx } colnames(R) <- rownames(R) <- colnames(x) return(R) } #result <- mapply(short,c(1:n.iter),MoreArgs=list(x,n.obs=n.obs,criteria=criteria,cut=cut,n.item=n.item,impute=impute,digits=digits,dictionary=dictionary,frac=frac,min.item=min.item,max.item=max.item)) #result <- mcmapply(short,c(1:n.iter),MoreArgs=list(x,n.obs=n.obs,criteria=criteria,cut=cut,n.item=n.item,impute=impute,digits=digits,dictionary=dictionary,frac=frac,min.item=min.item,max.item=max.item)) #options command: options("mc.cores"=4) will set the number of cores to 4. pairwiseCountBig <- function(x,size=NULL){ nvar <- NCOL(x) if (is.null(size)) size <- nvar/4 n.steps <- ceiling(nvar/size) small.r <- matrix(NA,size,size) short <- function(i) { res <- list() loweri <- (i-1) * size + 1 upperi <- min(i * size,nvar) lowerj <- min((i-1) * size + 1,nvar) upperj <- min(i * size,nvar) for (j in i:n.steps) { lowerj <- min((j-1) * size + 1,nvar) upperj <- min(j * size,nvar) small.r <- pairwiseCount(x[,loweri:upperi],x[lowerj:upperj]) res[[j]] <- t(small.r) } return(res) } result <- mcmapply(short,c(1:n.steps)) #now, stitch them together R <- matrix(NA,nvar,nvar) k <- 1 for(i in 1:NROW(result)) { nx <- dim(result[[i,1]])[1] temp <- unlist(result[i,]) R[k:(k+ nx-1),1:(k+ nx-1) ] <- temp R[1:(k+nx -1),k:(k+ nx-1)] <- t( R[k:(k+ nx-1),1:(k+ nx-1) ]) k <- k +nx } colnames(R) <- rownames(R) <- colnames(x) return(R) } psych/R/sdt.R0000644000176200001440000000507211515347322012525 0ustar liggesusers "sdt" <- function(x) { # if(!is.matrix(x)) {stop("x must be a matrix")} stopifnot(prod(dim(x)) == 4 || length(x) == 4) if (is.vector(x)) { x <- matrix(x, 2)} a <- x[1,1] b <- x[1,2] c <- x[2,1] d <- x[2,2] colnames(x) <- c("signal","noise") rownames(x) <- c("Signal","Noise") T <- sum(x) signals <- a+b H <- a/signals noise <- c+d F <- c/noise dprime <- qnorm(H) - qnorm(F) beta <- exp(((qnorm(F)^2 - qnorm(H))^2/2)) Beta <- dnorm(qnorm(H))/dnorm(qnorm(F)) Aprime <- .5 + sign((H-F) *((H-F)^2 + abs(H-F))/(4*max(H,F)-4 * H*F)) aprime <- .5 +(H-F)*(1-H-F)/(4*H*(1-F)) C <- -(qnorm(H)+qnorm(F))/2 Bprime <- sign(H-F) * ((H*(1-H) - F*(1-F))/((H*(1-H) + F*(1-F)) )) sdt <- list(x=x,dprime=dprime,Beta=Beta,lnBeta=log(Beta),beta=beta,lnbeta =log(beta),Aprime=Aprime,C=C,Bprime=Bprime,aprime=aprime) return(sdt) } beta.w <- function(hit,fa) { zhr <- qnorm(hit) zfar <- qnorm(fa) exp(-zhr*zhr/2+zfar*zfar/2) } aprime <-function(hit,fa) { a<-1/2+((hit-fa)*(1+hit-fa) / (4*hit*(1-fa))) b<-1/2-((fa-hit)*(1+fa-hit) / (4*fa*(1-hit))) a[fa>hit] <-b[fa>hit] a[fa==hit]<-.5 a } test.sdt <- function(n) { hits <- 1:n misses <- (n-1):0 fa <- sample(n,n,replace=TRUE) # fa <- n/4 cr <- n-fa test.sdt <- data.frame(hits,misses,fa,cr) } "sdt" <- function(x) { if(is.null(dim(x))) {# the case of a single problem if (length(x) == 2) {x <- matrix(x,nrow=1) H <- x[1] F <- x[2] colnames(x) <- c("Hit Rate","False Alarmes")} else { x <- matrix(x,nrow=1) # H <- x[,1]/(x[,1] + x[,2]) F <- x[,3]/(x[,3] + x[,4]) x[(x[,2] < 1)] <- .5 H <- x[,1]/(x[,1] + x[,2]) if (x[,3] < 1) {F <- x[,3]/(x[,3] + x[,4] + .5)} colnames(x) <- c("signal","noise","Signal","Noise")}} else { if(dim(x)[2] ==2) { H <- x[,1] F <- x[,2] colnames(x) <- c("Hit Rate","False Alarmes")} else { #the vector case x[(x[,2] < 1),2] <- .5 x[(x[,3] < 1),3] <- .5 H <- x[,1]/(x[,1] + x[,2]) F <- x[,3]/(x[,3] + x[,4]) colnames(x) <- c("signal","noise","Signal","Noise")} } dprime <- qnorm(H) - qnorm(F) beta <- dnorm(qnorm(H))/dnorm(qnorm(F)) #definitional cprime <- -.5*(qnorm(H) + qnorm(F))/dprime #macmillan beta.w <- exp(-qnorm(H)^2/2+qnorm(F)/2 ) #pallier = beta Aprime <- .5 +(H-F)*(1+H-F)/(4*H*(1-F)) #Grier Bprime <- sign(H-F) * ((H*(1-H) - F*(1-F))/((H*(1-H) + F*(1-F)) )) bprime <- ((1-H)*(1-F)-H*F)/((1-H)*(1-F)+H*F) sdt <- data.frame(x=x,dprime=dprime,beta=beta,cprime=cprime,lnBeta=log(beta),Aprime=Aprime,Bprime=Bprime,brime=bprime,beta.w = beta.w) return(sdt) }psych/R/auc.r0000644000176200001440000001045614444460145012550 0ustar liggesusers#Written December 7, 2017 to show various decision processes. "AUC" <- function(t=NULL,BR=NULL,SR=NULL,Phi=NULL,VP=NULL,labels=NULL,plot="b",zero=TRUE,correct=.5,col=c("blue","red")) { PLOT <- TRUE alpha <- .5 col <- adjustcolor(col,alpha.f =alpha) if(!is.null(t) & is.null(BR)) {stopifnot(prod(dim(t)) ==4 || length(t) ==4) if(is.vector(t)) t <- matrix(t,2, byrow=TRUE) } else {if(!is.null(t)) { phi <- SR SR <- BR BR <- t } if(!is.null(Phi)) { VP <- BR * SR + Phi *sqrt(BR * (1-BR) * SR *(1-SR))} t <- matrix(c(VP,SR -VP, BR-VP, 1- SR - BR + VP),2,2) } if((sum(t) > 1) && (min(t) == 0) && (correct > 0)) { message(" A cell entry of 0 was replaced with correct = ", correct, ". Check your data!") t[t==0] <- correct} #correction for continuity phi <- phi(t) tetra <- tetrachoric(t) colnames(t) <- c("Predicted.Pos","Predicted.Neg") rownames(t) <- c("True.Pos","True.Neg") observed <- t p1 <- t(t ( t)/rowSums(t)) t <- t / sum(t) rs <- rowSums(t) cs <- colSums(t) p <- (t)/rs #this has converted everything to conditional probabilities Sensitivity <- p[1,1] #Sensitivity VP Specificity <- p[2,2] #VN FP <- 1 - Specificity #really? Yes, because we are talking conditional probabilities PD <- rowSums(t) p1 <- p FN <- 1- Specificity #really? Yes because it is conditional Accuracy <- Sensitivity *PD[1] + Specificity * (1-PD[1]) phi <- phi(t) q <- qnorm(p1) #based upon the conditional probabilities #spell these out for readability zVP <- q[1,1] zFP <- q[2,1] criterion <- -zFP zVN <- q[2,2] d.prime <- zVP - zFP beta <- pnorm(d.prime - criterion)*rs[1]/(pnorm(criterion)*rs[2]) xmax <- max(4,d.prime+3) if(is.infinite(xmax)) {PLOT <- FALSE x <- seq(-3,4) } else {x <- seq(-3,xmax,.1) } y2 <- dnorm(x , -(!zero)*d.prime) * rs[2] #noise y <- dnorm(x, zero * d.prime ) * rs[1] #signal + noise if(PLOT){if(plot == "b") {op <- par(mfrow=c(2,1))} if((plot =="b") | (plot =="a")) { plot(Sensitivity ~ FP,xlim=c(0,1),ylim=c(0,1),ylab="Valid Positives",xlab="False Positives",main="Valid Positives as function of False Positives") segments(0,0,FP,Sensitivity) segments(FP,Sensitivity,1,1) segments(0,0,1,1)} } fpx <- pnorm(x-qnorm(Specificity)) vpx <- pnorm(x+ qnorm(Sensitivity)) fpx.diff <- diff(fpx) lower.sum <- sum(fpx.diff * vpx[-1]) upper.sum <- sum(fpx.diff * vpx[-length(vpx)]) auc <- (lower.sum + upper.sum)/2 if((plot =="b") | (plot =="a") && PLOT) { points(vpx ~ fpx,typ="l",lty="dashed") } if((plot =="b") | (plot =="d") &&PLOT) { plot(y ~ x, ylim=c(0,.4),ylab="Probability of observation",main="Decision Theory",type="l") points(y2 ~ x,lty="dashed",typ="l") # curve(dnorm(x,q[1,1]) * cs[1],-3,3,ylim=c(0,.4),ylab="Probability of observation",main="Decision Theory") # curve(dnorm(x,-q[2,2]) * cs[2], add=TRUE,lty="dashed") #segments((!zero)*(q[2,2]),0,(!zero)*(q[2,2]),dnorm(q[2,2])*cs[2]) x1 <- x[x >= (criterion -(!zero) *d.prime)] x1r <-rev(x1) y1 <- y[x >= (criterion - (!zero) *d.prime)] y2c <- y2 [x >= (criterion -(!zero) *d.prime)] y1r <- rep(0,length(y1)) polygon(c(x1,x1r),c(y1,y1r),col= col[1]) polygon(c(x1,x1r),c(y2c,y1r),col= col[2]) } if(plot =="b" && PLOT) par(op) result<- list(observed=observed,probabilities=t,conditional=p1,q=q,Accuracy = Accuracy,Sensitivity=Sensitivity,Specificity=Specificity,AUC = auc,d.prime = d.prime,beta = beta, criterion=criterion,phi=phi,tetrachoric=tetra$rho) class(result) <- c("psych","auc") return(result) } print_psych.auc <- function(x,digits=2) { cat('Decision Theory and Area under the Curve\n') cat('\nThe original data implied the following 2 x 2 table\n') print(x$probabilities,digits=digits) cat('\nConditional probabilities of \n') print(x$conditional,digits=digits) cat('\nAccuracy = ',round(x$Accuracy,digits=digits),' Sensitivity = ',round(x$Sensitivity,digits=digits), ' Specificity = ',round(x$Specificity,digits=digits), '\nwith Area Under the Curve = ', round(x$AUC,digits=digits) ) cat('\nd.prime = ',round(x$d.prime,digits=digits), ' Criterion = ',round(x$criterion,digits=digits), ' Beta = ', round(x$beta,digits=digits)) cat('\nObserved Phi correlation = ',round(x$phi,digits=digits), '\n Inferred latent (tetrachoric) correlation = ',round(x$tetrachoric,digits=digits)) } psych/R/biplot.psych.R0000644000176200001440000001501014345425351014345 0ustar liggesusers#rewritten Sept 18,2013 to allow much more controll over the points in a two dimensional biplot #the three or more dimensional case remains the same as before #The code for the two dimensional case is adapted (heavily) from the stats:::biplot function #corrected April 9, 2015 to allow multiple biplots in the same window #Seriously modified June 25, 2016 to allow better control for labels, as well as just generally cleaner code "biplot.psych" <- function(x, labels=NULL,cex=c(.75,1),main="Biplot from fa",hist.col="cyan",xlim.s=c(-3,3),ylim.s=c(-3,3),xlim.f=c(-1,1),ylim.f=c(-1,1),maxpoints=100,adjust=1.2,col,pos, arrow.len = 0.1,pch=16,choose=NULL,cuts=1,cutl=.0,group=NULL,smoother = FALSE,vars=TRUE,...) { if(is.null(x$scores)) stop("I am sorry, but Biplot requires factor/component scores. \nYou need to run fa/pca from the raw data") op <- par() old.par <- par(no.readonly = TRUE) on.exit(par(old.par)) MAR <- par("mar") MFROW <- par("mfrow") title <- main main <- NULL #fa.poly nests the fa and scores within a list if(is.list(x$scores)) x$scores <- x$scores$scores #the case of fa.poly output if(is.list(x$fa)) x$loadings <- x$fa$loadings #once again, the case of fa.poly if(!is.null(choose)) { #plot just a pair x$scores <- x$scores[,choose,drop=FALSE] x$loadings <- x$loadings[,choose,drop=FALSE] } colnames(x$scores) <- colnames(x$loadings) if((missing(group)) || (is.null(group))) group <- rep(1,nrow(x$scores)) #if(missing(pos)) pos <- NULL #if(is.null(labels)) {if(nrow(x$scores) > maxpoints ) {labels = rep(".",dim(x$scores)[1] )} else {labels = rep("o",dim(x$scores)[1] )}} n.dims <- dim(x$loadings)[2] #this is taken directly from biplot # if (missing(col)) { # col <- par("col") # if (!is.numeric(col)) # col <- match(col, palette(), nomatch = 1L) # col <- c(col, col + 1L) # } # else if (length(col) == 1L) # col <- c(col, col) if(missing(col)) {col <- c("black","red","blue","#FF0000FF", "#00FF00FF", "#00FFFFFF", "#0000FFFF" ,"#FF00FFFF")} #rainbow + black, red ch.col <- col #here is where we add some plotting controls that are missing from stats:::biplot if (n.dims == 2) { #just do a one panel graph op <- par(pty = "s") #we no longer resize the margins, but rather adjust where the title goes #if (!is.null(main)) op1 <- c(op, par("mar" = MAR + c(0, 0, 1, 0))) #give room for the title -- # if (!is.null(main)) par("mar" = MAR + c(0, 0, 1, 0)) #give room for the title - #plotone does the work plotone(x$scores,x$loading,labels=labels,main=main,xlim.s=xlim.s,ylim.s=ylim.s,xlim.f=xlim.f,ylim.f=ylim.f,maxpoints=maxpoints,adjust=adjust,col=col,pos=pos, arrow.len = arrow.len,pch=pch,choose=choose,cuts=cuts,cutl=cutl,group=group,ch.col=ch.col,smoother=smoother,vars=vars,... ) par(new = TRUE) #this sets it up so that we can plot on top of a plot dev.hold() on.exit(dev.flush(), add = FALSE) } else { #the case of 3 or more factors -- we do the equivalent of a pairs plot op1 <- par(mfrow=c(n.dims,n.dims), mar=c(2,3,3,2)) if(nrow(x$scores) > maxpoints) {labels <- rep(".",nrow(x$scores))} else {labels <- rep("o",nrow(x$scores))} for (i in 1:n.dims) { for (j in 1:n.dims){ if(i==j) {h <- hist(x$scores[,i],freq=FALSE, main=colnames(x$loadings)[i],xlab="",ylab="",col=hist.col) breaks <- h$breaks; nB <- length(breaks) tryd <- try( d <- density(x$scores[,i],na.rm=TRUE,bw="nrd",adjust=adjust),silent=TRUE) if(!inherits(tryd,"try-error")) { lines(d)} } else { # biplot(x$scores[,c(j,i)],x$loadings[,c(j,i)],xlabs=labels,xlab="",ylab="",cex=cex,xlim=xlim.s,ylim=ylim.s,pch=pch,...)} plotone(x$scores[,c(j,i)],x$loadings[,c(j,i)],main=NULL,xlim.s=xlim.s,ylim.s=ylim.s,xlim.f=xlim.f,ylim.f=ylim.f,maxpoints=maxpoints,adjust=adjust,col=col,pos=pos, arrow.len = arrow.len,pch=pch,choose=choose,cuts=cuts,cutl=cutl,group=group,ch.col=ch.col,smoother=smoother,vars=vars,... )} #work on this way of doing it } } } #We do not want to reset the margins back to their prior values, because then we lose the ability to add lines to the figure # par(old.par) # par("mar"=MAR) #this puts the margins back to what they were when we started. Important for multiple biplots # par("mfrow"=MFROW) title(title,line=2) } #End of biplot.psych plotone <- function( scores,loadings,labels=NULL,cex=c(.75,1),main=main,hist.col="cyan",xlim.s=c(-3,3),ylim.s=c(-3,3),xlim.f=c(-1,1),ylim.f=c(-1,1),maxpoints=100,adjust=1.2,col,pos, arrow.len = 0.1,pch=16,choose=NULL,cuts=1,cutl=.0,group=NULL,ch.col=c("black","blue"),smoother=FALSE, vars=TRUE,...) { #There are three different conditions #1 just show the (unlabeled) points #2 don't show the points, but put in labels at their locations #3 show the points with labels near them according to pos choice <- "one" if(!is.null(labels)) { if(missing(pos)) {choice <- "two" } else {choice <- "three"} } if(smoother) choice="smoother" switch(choice, #we plot the scores here for scores > abs(cut) on x and y "one" = {plot(scores, xlim = xlim.s, ylim = ylim.s, cex=cex[1],main=main,pch=pch[group],bg=ch.col[group],col=col[group],...) } , "two" = {plot(scores,typ='n', xlim = xlim.s, ylim = ylim.s,cex=cex[1],main=main,pch=pch[group],bg=ch.col[group],col=col[group],...) labels[sqrt((abs(scores[,1])^2 + abs(scores[,2])^2 ) ) < cuts] <- NA text(scores,labels=labels,col=ch.col[group],pos=NULL,cex=cex[1]) }, "three" = {plot(scores, xlim = xlim.s, ylim = ylim.s,cex=cex[1],main=main,pch=pch[group],bg=ch.col[group],col=col[group],...) labels[sqrt((abs(scores[,1])^2 + abs(scores[,2])^2)) < cuts] <- NA text(scores,labels=labels,pos=pos,cex=cex[1],col=ch.col[group])}, "smoother" = {smoothScatter(scores, nrpoints=0) } ) par(new = TRUE) #this sets it up so that we can plot on top of a plot dev.hold() on.exit(dev.flush(), add = FALSE) plot(loadings, axes = FALSE, type = "n", xlim = xlim.f, ylim = ylim.f, xlab = "", ylab = "", col = col[1L], ...) labels <- rownames(loadings) labels[sqrt(loadings[,1]^2 + loadings[,2]^2) < cutl] <- NA text(loadings, labels = labels, cex = cex[2L], col = col[2L], ...) if(vars) {arrows(0, 0, loadings[, 1L] * 0.8, loadings[, 2L] * 0.8, col = col[2L], length = arrow.len)} else { arrows(0, 0, scores[, 1L] * xlim.f/xlim.s, scores[, 2L] * ylim.f/ylim.s, col = col[2L], length = arrow.len)} axis(3, col = col[2L], ...) axis(4, col = col[2L], ...) box(col = col[1L]) } psych/R/dummy.code.R0000644000176200001440000000131114364040203013760 0ustar liggesusers"dummy.code" <- function(x,group=NULL,na.rm=TRUE,top=NULL,min=NULL) { t <- table(x) t <- sort(t,decreasing=TRUE) if(!is.null(min)) {top <- sum(t >= min)} if(is.null(top)) top <- length(t) t <- t[1:top] lt <- length(t) n.obs <- length(x) if(is.null(group)) {new <- matrix(0,nrow=n.obs,ncol=lt) #the case of no grouping information if(na.rm) {new[is.na(x),] <- NA } #added 10/20/17 xlev <- factor(x,levels=names(t)) for (i in 1:n.obs) { new[i,xlev[i]] <- 1} colnames(new) <- names(t) } else {new <- rep(0,n.obs) #the alternative is to combine categories xlev <- as.factor(x) if(na.rm) {new[is.na(x)] <- NA } for (i in 1:n.obs) { new[i] <- xlev[i] %in% group} } return(new) } psych/R/superMatrix.R0000644000176200001440000000531514162200325014246 0ustar liggesusers "superMatrix" <- function(x,y=NULL) {if(is.list(x)) { if(is.null(y)) { y <- x[-1]} else { y <- list(x[-1],y) } x <- x[[1]] } if(is.list(y)) { if (length(y) > 1) { x <- superMatrix(x,y[[1]]) xy <- superMatrix(x,y[-1]) } else {y <- y[[1]] xy <- superMatrix(x,y)} } else { if(is.vector(x)) {x <- matrix(x) colnames(x) <- "X" if(dim(x)[1] <2) {rownames(x) <- "X"} else {rownames(x) <- paste("Vx",1:dim(x)[1],sep="") } } else {if (is.null(colnames(x))) colnames(x) <- paste("X",1:dim(x)[2],sep="") if (is.null(rownames(x))) rownames(x) <- paste("Vx",1:dim(x)[1],sep="")} if(is.vector(y)) {y <- matrix(y) colnames(y) <- "Y" if(dim(y)[1]<2) {rownames(y) <- "Y"} else {rownames(y) <- paste("Vy",1:dim(y)[1],sep="") } } else {if (is.null(colnames(y))) colnames(y) <- paste("Y",1:dim(y)[2],sep="") if (is.null(rownames(y))) rownames(y) <- paste("Vy",1:dim(y)[1],sep="")} fillx <- rbind(x,matrix(0,ncol=dim(x)[2],nrow=dim(y)[1])) filly <- rbind(matrix(0,ncol=dim(y)[2],nrow=dim(x)[1]),y) xy <- cbind(fillx,filly) colnames(xy) <- c(colnames(x),colnames(y)) rownames(xy) <- c(rownames(x),rownames(y)) } return(xy)} #fixed June 21, 2009 to add rownames of matrices if necessary #modified June 8, 2012 to add list input option "super.matrix" <- function(x,y) { .Deprecated("super.matrix", msg = "super.matrix is deprecated. Please use the superMatrix function") xy <- superMatrix(x,y) return(xy)} #fixed June 21, 2009 to add rownames of matrices if necessary #modified June 8, 2012 to add list input option #a helper function to take the output of scoreOverlap #and form the matrix of (scores by item) x (scores by item) correlations #so that we can then do bestScales on the items related to the score #Created 12/23/21 "superCor" <- function(x,y=NULL,xy=NULL) { if(length(class(x)) > 1) { value <- class(x)[2] } if(value %in% c("score.items","overlap")) { scores <- x x <- scores$cor xy <- scores$item.cor} nx <- NCOL(x) ny <- NROW(xy) if(is.null(y)) {nc <- nx nr <- nx + ny} else {nc <- nx + NCOL(y) nr <- nc if(NCOL(y)!= NROW(xy)) stop("Rows of xy matrix do not match rows of y. Did you remember to specify select=FALSE when using scoreOverlap or scoreItem? ")} R <- matrix(NA,nr,nc) R[1:nx,1:nx] <- x R[(nx+1):nr,1:nx] <- xy if(!is.null(y)) {R[1:nx,(nx+1):nc] <- t(xy) R[(nx+1):nr,(nx+1):nc] <- y} colnames(R) <- c(colnames(x),colnames(y)) rownames(R ) <- c(colnames(x),rownames(xy)) return(R)} psych/R/irt.0p.R0000644000176200001440000000056711351165222013046 0ustar liggesusers"irt.0p" <- function(items) { possible <- dim(items)[2] raw <- rowMeans(items,na.rm=TRUE) ave <- raw valid <- rowSums(!is.na(items)) ave[(!is.na(ave))&(ave<.0001)] <- .5/(possible) ave[(!is.na(ave))&(ave > .9999)] <- (possible-.5)/possible theta <- -log((1/ave) -1) irt.0p <- matrix(c(raw,theta,valid),ncol=3) colnames(irt.0p ) <- c("raw","theta0","valid") return(irt.0p) }psych/R/rmsea.R0000644000176200001440000000231214444163334013037 0ustar liggesusersRMSEA <- function(chisq, dof,n.obs,alpha=.1){ tail <- alpha/2 if(!is.na(chisq)){ RMSEA <- sqrt(max(chisq/(dof* n.obs) - 1/(n.obs-1), 0)) max <- max(n.obs,chisq) +2* n.obs RMSEA.U <- 0 #in case we can not find it if(pchisq(df=dof,q=chisq) > tail){ RMSEA.U <-try ( sqrt(uniroot(function(x) {pchisq(df=dof,ncp=x,q=chisq)- tail},c(0,max))$root/(n.obs-1)/dof),silent=TRUE) if(inherits( RMSEA.U,"try-error")) {if(RMSEA <= 0 ) {RMSEA.U <- 0} else {message(" I could not find the RMSEA upper bound . Sorry about that") #if the fit is super good, then the chisq is too small to get an upper bound. Report it as 0. RMSEA.U <- NA}} } RMSEA.L <- 0 #in case we can not find it if(pchisq(df=dof,q=chisq) > (1-tail)) { RMSEA.L <- try( sqrt(uniroot(function(x) {pchisq(df=dof,ncp=x,q=chisq)-1 + tail},c(0,max))$root/(n.obs-1)/dof) ,silent=TRUE) if(inherits(RMSEA.L,"try-error")) { RMSEA.L <- NA} } else {RMSEA.L <- 0} # result <- list(RMSEA.L=RMSEA.L,RMSEA=RMSEA,RMSEA.U=RMSEA.U,alpha=alpha) class(result) <- c("psych","rmsea") return(result) } else {result <- RMSEA <- RMSEA.L <- RMSEA.U <- NULL} } # chisq <- objective * ((n.obs-1) -(2 * n.var + 5)/6 -(2*nfactors)/3)psych/R/sim.circ.R0000644000176200001440000000255411124030611013426 0ustar liggesusers"sim.circ" <- function (nvar = 72 ,nsub = 500, circum = TRUE, xloading =.6, yloading = .6, gloading=0, xbias=0, ybias = 0,categorical=FALSE, low=-3,high=3,truncate=FALSE,cutpoint=0) { avloading <- (xloading+yloading)/2 errorweight <- sqrt(1-(avloading^2 + gloading^2)) #squared errors and true score weights add to 1 g <- rnorm(nsub) truex <- rnorm(nsub)* xloading +xbias #generate normal true scores for x + xbias truey <- rnorm(nsub) * yloading + ybias #generate normal true scores for y + ybias if (circum) #make a vector of radians (the whole way around the circle) if circumplex {radia <- seq(0,2*pi,len=nvar+1) rad <- radia[which(radia<2*pi)] #get rid of the last one } else rad <- c(rep(0,nvar/4),rep(pi/2,nvar/4),rep(pi,nvar/4),rep(3*pi/2,nvar/4)) #simple structure #simple structure error<- matrix(rnorm(nsub*(nvar)),nsub) #create normal error scores #true score matrix for each item reflects structure in radians trueitem <- outer(truex, cos(rad)) + outer(truey,sin(rad)) item<- gloading * g + trueitem + errorweight*error #observed item = true score + error score if (categorical) { item = round(item) #round all items to nearest integer value item[(item<= low)] <- low item[(item>high) ] <- high } if (truncate) {item[item < cutpoint] <- 0 } return (item) } psych/R/omegaSem.R0000644000176200001440000001310613715252250013464 0ustar liggesusers#August 12, 2020 dropped the calls to the sem package. Just use lavaan for the processing "omegaSem" <- function(m,nfactors=3,fm="minres",key=NULL,flip=TRUE, digits=2,title="Omega",sl=TRUE,labels=NULL, plot=TRUE,n.obs=NA,rotate="oblimin",Phi = NULL,option="equal",lavaan=TRUE,...) { #m is a correlation matrix, or if not, the correlation matrix is found #nfactors is the number of factors to extract #key allows items to be reversed scored if desired #if Phi is not null, this implies that we have been given a factor matrix -- added May 30, 2010 if(lavaan) {if(!requireNamespace('lavaan')) stop("You must have the lavaan package installed to use omegaSem")} #else {if(!requireNamespace('sem')) stop("You must have the sem package installed to use omegaSem")} if (!sl) {warning("OmegaSem only works for Bifactor models, sl set to TRUE ") sl <- TRUE} cl <- match.call() om <- omega(m=m,nfactors=nfactors,fm=fm,key=key,flip=flip, digits=digits,title=title,sl=sl,labels=labels, plot=plot,n.obs=n.obs,rotate=rotate,Phi=Phi,option=option,...) #m is a correlation matrix, or if not, the correlation matrix is found #nfactors is the number of factors to extract #key allows items to be reversed scored if desired #if Phi is not null, this implies that we have been given a factor matrix -- added May 30, 2010 if(lavaan) {sem.model <- om$model$lavaan} else {sem.model <- om$model$sem} if(is.na(n.obs)) {n.obs <- om$gstats$n.obs} if(dim(m)[1] != dim(m)[2]) { n.obs <- dim(m)[1] m <- cor(m,use="pairwise")} else { m <- cov2cor(as.matrix(m)) #make sure it is a correlation matrix not a covariance or data matrix (if we change this, we will need to change the calculation for omega later) } nvar <- dim(m)[2] if(is.na(n.obs)) {message("Number of observations not specified. Arbitrarily set to 500") n.obs <- 500 } if(is.null(colnames(m))) { rownames(m) <- colnames(m) <- paste("V",1:nvar,sep="") } m.names <- colnames(m) if(lavaan) {if(!requireNamespace('lavaan')) stop("You must have the lavaan package installed to use omegaSem")} #else {if(!requireNamespace('sem')) stop("You must have the sem package installed to use omegaSem")} #if(!requireNamespace('sem')) {stop("You must have the sem package installed to use omegaSem") if(lavaan) {sem.om <- lavaan::cfa(sem.model,sample.cov=m,sample.nobs=n.obs,orthogonal=TRUE,std.lv=TRUE) } #else {sem.om <- sem::sem(sem.model,m, n.obs) } omega.efa <- omegaFromSem(sem.om,m,flip=flip) results <- list(omegaSem=om,omega.efa=omega.efa,sem=sem.om,Call=cl) class(results) <- c("psych", "omegaSem") return(results) } #modified Sept 1 to pass the number of observations to SEM #modified Jan 2, 2015 to call sem::sem which seems to be the preferred manner "omegaFromSem" <- function(fit,m=NULL,flip=TRUE,plot=TRUE) { # m is the correlation matrix # s is the sem solution from either sem or from lavaan -- I think I want to drop the sem option if(inherits(fit,"lavaan")) { #get the lavaan parameters fx <- fit@Model@GLIST$lambda m <- cov2cor(as.matrix(fit@SampleStats@cov[[1]])) Fit <- fit@Fit@test sem <- "lavaan" nvar <- nrow(fx) n.fact <- ncol(fx) g <- fx[,1] rn <- fit@Data@ov.names[[1]] cfa.loads <- fx} else { #get the sem parameters sem <- "sem" nvar <- dim(m)[1] n.fact <-dim(fit$A)[2]-nvar rn <- rownames(m) g <- fit$A[1:nvar,nvar+n.fact] Fit <- NULL #for now cfa.loads <- cbind(g,fit$A[1:nvar,(nvar+1):(nvar+n.fact-1)]) #this puts g first } if(flip) { flipper <- rep(1,nvar) flipper[g < 0] <- -1 signkey <- strtrim(flipper,1) signkey[signkey=="1"] <- "" rn <- paste(rn,signkey,sep="") flipper <- diag(flipper) m <- flipper %*% m %*% t(flipper) g <- flipper %*% g cfa.loads <- flipper %*% cfa.loads countneg <- colSums(cfa.loads < 0) for(i in 1:n.fact) { if(countneg[i]> 0 ) cfa.loads[,i] <- -cfa.loads[,i] } } w <- solve(m,cfa.loads) rownames(cfa.loads) <- rn rownames(w) <- rn if(NCOL(cfa.loads) >1 ) {colnames(cfa.loads) <- c("gs",paste0("F",1:(n.fact-1),"s*"))} else { warning("\nJust one factor defined, therefore, just omega_total is found") colnames(cfa.loads) <- "gs"} gR2 <- diag(t(w) %*% cfa.loads) Vt <- sum(m) omh.sem <- sum(g)^2/Vt h2 <- sum(rowSums(cfa.loads^2)) uniq <- tr(m) - h2 omt.sem <- (Vt - uniq)/Vt #find the subset omegas omg <- omgo <- omt<- rep(NA,n.fact) sub <- apply(cfa.loads,1,function(x) which.max(abs(x[2:(n.fact)]))) grs <- 0 for(group in( 1:n.fact)) { groupi <- which(sub==group) if(length(groupi) > 0) { Vgr <- sum(m[groupi,groupi]) gr <- sum(cfa.loads[groupi,(group+1)]) grs <- grs + gr^2 omg[group+1] <- gr^2/Vgr omgo[group+1] <- sum(cfa.loads[groupi,1])^2/Vgr omt[group+1] <- (gr^2+ sum(cfa.loads[groupi,1])^2)/Vgr } } omgo[1] <- sum(cfa.loads[,1])^2/sum(m) #omega h omg[1] <- grs/sum(m) #omega of subscales omt[1] <- omt.sem om.group <- data.frame(total=omt,general=omgo,group=omg) # rownames(om.group) <- colnames(gf$sl)[1:(nfactors+1)] class(cfa.loads) <- "loadings" results <- list(omega=omh.sem,omega.tot = omt.sem,cfa.loads=cfa.loads,gR2=gR2,omega.group=om.group,Fit=Fit,sem=sem) class(results) <- c("psych","omegaSem") if(plot) {if(n.fact > 1) { omega.diagram(results,sort=TRUE)} else {if(sem == 'lavaan') lavaan.diagram(fit,cut=0) } } return(results) } #added the ability to work with just 1 factor (following error report by Erich Studerus) August 12, 2020 psych/R/summary.psych.R0000744000176200001440000004770714526773076014607 0ustar liggesusers#Modified 1/10/14 to convert to switch "summary.psych" <- function(object,digits=2,items=FALSE,...) { #figure what we are trying to summarize #omega, ICLUST, score.clusters,cluster.cor #faBy, esem #if(!is.null(object$title)) { cat("\nSummary of an analysis of ",object$title)} #figure out which psych function called us if(length(class(object)) > 1) { mat.reg <- bassAck <- overlap <- scores <- none <- extend <- extension <- crossV <- NA #to let it compile in R4.0 obnames <- cs(principal,score.items,cluster.loadings,mat.regress, set.cor, lmCor, mat.reg, bassAck, bestScales,crossV,iclust,omega,omegaSem,omegaDirect,overlap, scores,testRetest, vss,cluster.cor, esem,fa,faBy,extend,extension,items,alpha,setCor,irt.fa,cohen.d,cohen.d.by,mediate,describeData,none) value <- inherits(object, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} if(value=="extend") value <- "extend" if(value=="extension") value <- "extend" if(value=="principal") value <- "fa" if(value=="score.items") value <- "scores" if(value=="cluster.loadings") value <- "cluster.cor" if(value=="mat.regress") value <- "mat.reg" if(value=="set.cor") value <- "lmCor" if(value=="mat.reg") value <- "lmCor" } else {value <- "none"} switch(value, bassAck = { cat("\nCall: ") print(object$Call) nf <- length(object$bass.ack)-1 for (f in 1:nf) { cat("\nFactor correlations\n ") print(round(object$bass.ack[[f]],digits=digits)) } }, bestScales = { cat("\nCall = ") print(object$Call) # print(object$first.result) # print(round(object$means,2)) if(!is.null(object$summary)) { print(object$summary,digits=digits)} else { df <- data.frame(correlation=object$r,n.items = object$n.items) cat("The items most correlated with the criteria yield r's of \n") print(round(df,digits=digits)) } # if(!is.null(object$optimal)) { # cat("\n Optimal number of items, derivation and cross validation\n") # print(object$optimal,digits=digits) } else { # df <- data.frame(correlation=object$r,n.items = object$n.items) # cat("The items most correlated with the criteria yield r's of \n") # print(round(df,digits=digits)) } }, crossV = {cat("Cross Validation\n") cat("Call:") print(object$Call) cat("\nValidities from raw items and from the correlation matrix\n") cat("Number of unique predictors used = ",object$nvars,"\n") if(!is.null(object$mean.fit)) {print(object$mean.fit,digits=digits)} else {print(object$crossV,digits=digits)} }, iclust = { cat("ICLUST (Item Cluster Analysis)") cat("Call: ") print(object$call) cat( object$title,"\n") cat("\nPurified Alpha:\n") print(object$purified$alpha,digits) cat("\n Guttman Lambda6* \n") print(object$G6,digits) cat("\nOriginal Beta:\n") print(object$beta,digits) cat("\nCluster size:\n") print(object$purified$size,digits) if(!is.null(object$purified$cor)) {cat("\nPurified scale intercorrelations\n reliabilities on diagonal\n correlations corrected for attenuation above diagonal: \n") print(object$purified$corrected,digits) } } , omega = { cat( object$title,"\n") print(object$Call) cat("Alpha: ",round(object$alpha,digits),"\n") cat("G.6: ",round(object$G6,digits),"\n") cat("Omega Hierarchical: " ,round(object$omega_h,digits),"\n") cat("Omega H asymptotic: " ,round(object$omega.lim,digits),"\n") cat("Omega Total " ,round(object$omega.tot,digits),"\n") numfactors <- dim(object$schmid$sl)[2] -3 eigenvalues <- diag(t(object$schmid$sl[,1:numfactors]) %*% object$schmid$sl[,1:numfactors]) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=2) maxmin <- max(eigenvalues[2:numfactors])/min(eigenvalues[2:numfactors]) gmax <- eigenvalues[1]/max(eigenvalues[2:numfactors]) # cat("\ngeneral/max " ,round(gmax,digits)," max/min = ",round(maxmin,digits),"\n") cat("The degrees of freedom for the model is",object$schmid$dof," and the fit was ",round(object$schmid$objective,digits),"\n") if(!is.na(object$schmid$n.obs)) {cat("The number of observations was ",object$schmid$n.obs, " with Chi Square = ",round(object$schmid$STATISTIC,digits), " with prob < ", round(object$schmid$PVAL,digits),"\n")} if(!is.null(object$stats$rms)) {cat("\nThe root mean square of the residuals is ", round(object$stats$rms,digits),"\n") } if(!is.null(object$stats$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$stats$crms,digits),"\n") } if(!is.null(object$schmid$RMSEA)) {cat("\nRMSEA and the ",object$schmid$RMSEA[4] ,"confidence intervals are ",round(object$schmid$RMSEA[1:3],digits+1)) } if(!is.null(object$schmid$BIC)) {cat("\nBIC = ",round(object$schmid$BIC,digits))} if(!is.null(object$ECV)) cat("Explained Common Variance of the general factor = ", round(object$ECV,digits),"\n") cat("\n Total, General and Subset omega for each subset\n") colnames(object$omega.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") print(round(t(object$omega.group),digits)) }, omegaSem = { object <- object$omegaSem cat( object$title,"\n") cat("Alpha: ",round(object$alpha,digits),"\n") cat("G.6: ",round(object$G6,digits),"\n") cat("Omega Hierarchical: " ,round(object$omega_h,digits),"\n") cat("Omega H asymptotic: " ,round(object$omega.lim,digits),"\n") cat("Omega Total " ,round(object$omega.tot,digits),"\n") numfactors <- dim(object$schmid$sl)[2] -3 eigenvalues <- diag(t(object$schmid$sl[,1:numfactors]) %*% object$schmid$sl[,1:numfactors]) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=2) maxmin <- max(eigenvalues[2:numfactors])/min(eigenvalues[2:numfactors]) gmax <- eigenvalues[1]/max(eigenvalues[2:numfactors]) # cat("\ngeneral/max " ,round(gmax,digits)," max/min = ",round(maxmin,digits),"\n") cat("The degrees of freedom for the model is",object$schmid$dof," and the fit was ",round(object$schmid$objective,digits),"\n") if(!is.na(object$schmid$n.obs)) {cat("The number of observations was ",object$schmid$n.obs, " with Chi Square = ",round(object$schmid$STATISTIC,digits), " with prob < ", round(object$schmid$PVAL,digits),"\n")} if(!is.null(object$stats$rms)) {cat("\nThe root mean square of the residuals is ", round(object$stats$rms,digits),"\n") } if(!is.null(object$stats$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$stats$crms,digits),"\n") } if(!is.null(object$schmid$RMSEA)) {cat("\nRMSEA and the ",object$schmid$RMSEA[4] ,"confidence intervals are ",round(object$schmid$RMSEA[1:3],digits+1)) } if(!is.null(object$schmid$BIC)) {cat("\nBIC = ",round(object$schmid$BIC,digits))} if(!is.null(object$ECV)) cat("Explained Common Variance of the general factor = ", round(object$ECV,digits),"\n") cat("\n Total, General and Subset omega for each subset\n") colnames(object$omega.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") print(round(t(object$omega.group),digits)) }, omegaDirect = { cat("Call: ") print(object$Call) cat("Omega H direct: " ,round(object$omega.g,digits),"\n") eigenvalues <- diag(t(object$loadings) %*% object$loadings) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=2) cat("The degrees of freedom for the model is",object$orth.f$dof," and the fit was ",round(object$orth.f$objective,digits),"\n") if(!is.na(object$orth.f$n.obs)) {cat("The number of observations was ",object$orth.f$n.obs, " with Chi Square = ",round(object$orth.f$STATISTIC,digits), " with prob < ", round(object$orth.f$PVAL,digits),"\n")} if(!is.null(object$orth.f$rms)) {cat("\nThe root mean square of the residuals is ", round(object$orth.f$rms,digits),"\n") } if(!is.null(object$orth.f$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$orth.f$crms,digits),"\n") } if(!is.null(object$orth.f$RMSEA)) {cat("\nRMSEA and the ",object$orth.f$RMSEA[4] ,"confidence intervals are ",round(object$orth.f$RMSEA[1:3],digits+1)) } if(!is.null(object$orth.f$BIC)) {cat("\nBIC = ",round(object$orth.f$BIC,digits))} cat("\n Total, General and Subset omega for each subset\n") colnames(object$om.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") print(round(t(object$om.group),digits)) }, overlap = { cat("Call: ") print(object$Call) cat("\nScale intercorrelations adjusted for item overlap") cat("\nScale intercorrelations corrected for attenuation \n raw correlations (corrected for overlap) below the diagonal, (standardized) alpha on the diagonal \n corrected (for overlap and reliability) correlations above the diagonal:\n") print(object$corrected,digits) result <- object$corrected }, scores = { #also score.items cat("Call: ") print(object$Call) if(object$raw) { cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, (unstandardized) alpha on the diagonal \n corrected correlations above the diagonal:\n") } else { cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, (standardized) alpha on the diagonal \n corrected correlations above the diagonal:\n") } print(object$corrected,digits) result <- object$corrected }, testRetest ={ cat("Call: ") print(object$Call) cat("Test-retest correlations and reliabilities\n") cat("Test retest correlation = " ,round(object$r12,digits)) cat("\n Alpha reliabilities for both time points \n") print(round(object$alpha,digits=digits)) cat("\n \n") }, vss = { if(object$title!="Very Simple Structure") { cat("\nVery Simple Structure of ", object$title,"\n") } else {cat("\nVery Simple Structure\n")} cat("VSS complexity 1 achieves a maximimum of ") vss.max <- round(max(object$cfit.1) ,digits) cat(vss.max," with " ,which.max(object$cfit.1), " factors\n") cat("VSS complexity 2 achieves a maximimum of ") vss.max <- round(max(object$cfit.2) ,digits) cat(vss.max," with " ,which.max(object$cfit.2), " factors\n") cat("\nThe Velicer MAP criterion achieves a minimum of ") vss.map <- round(max(object$map) ,digits) cat(vss.map," with " ,which.min(object$map), " factors\n ") }, cluster.cor = { cat("Call: ") print(object$Call) cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, (standardized) alpha on the diagonal \n corrected correlations above the diagonal:\n") print(object$corrected,digits) result <- object$corrected }, esem = { cat("\nExploratory Structural Equation Modeling with Call: ") print(object$Call) nfactors <- dim(object$loadings)[2] objective <- object$criteria[1] if(!is.null(objective)) { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.") cat("\nThe degrees of freedom for the model is",object$dof," and the objective function was ",round(objective,digits),"\n") if(!is.na(object$n.obs)) {cat("The number of observations was ",object$n.obs, " with Chi Square = ",round(object$STATISTIC,digits), " with prob < ", signif(object$PVAL,digits),"\n")} } if(!is.null(object$rms)) {cat("\nThe root mean square of the residuals (RMSA) is ", round(object$rms,digits),"\n") } if(!is.null(object$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$crms,digits),"\n") } if(!is.null(object$TLI)) {cat("\nTucker Lewis Index of factoring reliability = ",round(object$TLI,digits+1))} if(!is.null(object$RMSEA)) {cat("\nRMSEA index = ",round(object$RMSEA[1],digits+1), " and the", (1- object$RMSEA[4])*100,"% confidence intervals are ",round(object$RMSEA[2:3],digits+1)) } if(!is.null(object$BIC)) {cat("\nBIC = ",round(object$BIC,digits))} if(!is.null(object$Phi)) { colnames(object$Phi) <- rownames(object$Phi) <- colnames(object$loadings) print(round(object$Phi,digits))} }, extend = {cat("\n Factor extensions analysis with Call: ") print(object$Call) nfactors <- dim(object$loadings)[2] if(!is.null(object$Phi)){cat ("\n With factor correlations of \n" ) colnames(object$Phi) <- rownames(object$Phi) <- colnames(object$loadings) print(round(object$Phi,digits))} }, fa = { cat("\nFactor analysis with Call: ") print(object$Call) nfactors <- dim(object$loadings)[2] objective <- object$criteria[1] if(!is.null(objective)) { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.") cat("\nThe degrees of freedom for the model is",object$dof," and the objective function was ",round(objective,digits),"\n") if(!is.na(object$n.obs)) {cat("The number of observations was ",object$n.obs, " with Chi Square = ",round(object$STATISTIC,digits), " with prob < ", signif(object$PVAL,digits),"\n")} } if(!is.null(object$rms)) {cat("\nThe root mean square of the residuals (RMSA) is ", round(object$rms,digits),"\n") } if(!is.null(object$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$crms,digits),"\n") } if(!is.null(object$TLI)) {cat("\nTucker Lewis Index of factoring reliability = ",round(object$TLI,digits+1))} if(!is.null(object$RMSEA)) {cat("\nRMSEA index = ",round(object$RMSEA[1],digits+1), " and the", (1- object$RMSEA[4])*100,"% confidence intervals are ",round(object$RMSEA[2:3],digits+1)) } if(!is.null(object$BIC)) {cat("\nBIC = ",round(object$BIC,digits))} if(!is.null(object$Phi)) { if(object$fn == "principal") {cat ("\n With component correlations of \n" ) } else {cat ("\n With factor correlations of \n" )} colnames(object$Phi) <- rownames(object$Phi) <- colnames(object$loadings) print(round(object$Phi,digits))} }, faBy = {cat("\nFactor analysis within groups with Call: ") print(object$Call) cat("\nLoadings pooled across groups\n") print(object$mean.loading,digits=digits) cat("\nFactor correlations pooled across groups\n") print(object$mean.Phi,digits = digits) }, fa.reg = {cat("\nFactor analysis regression Call: ") print(object$Call) summary(object$regression) }, items= { if(object=="omega") { cat("\nSchmid Leiman Factor loadings:\n") print(object$schmid$sl) numfactors <- dim(object$schmid$sl)[2] -2 eigenvalues <- diag(t(object$schmid$sl[,1:numfactors]) %*% object$schmid$sl[,1:numfactors]) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=digits) } if(!is.null(object$item.cor) ) { cat("\nItem by scale correlations:\n") print(object$item.cor,digits) } if (!is.null(object$p.sorted$sorted)) { cat("\nItem by Cluster Structure matrix:\n") print(object$p.sorted$sorted,digits) } if (!is.null(object$purified$pattern)) { cat("\nItem by Cluster Pattern matrix:\n") print(object$purified$pattern,digits) } if(vss) { cat("\nVelicer MAP\n") print(object$map,digits) cat("\nVery Simple Structure Complexity 1\n") print(object$cfit.1,digits) cat("\nVery Simple Structure Complexity 2\n") print(object$cfit.2,digits) } }, #end if items alpha= { cat("\nReliability analysis ",object$title," \n") print(object$total,digits=digits) }, lmCor = { if(object$raw) {cat("\nMultiple Regression from raw data \n")} else { cat("\nMultiple Regression from matrix input \n")} print(object$Call) cat("\nMultiple Regression from matrix input \n") cat("\nBeta weights \n") print(object$coefficients,digits) cat("\nMultiple R \n") print(object$R,digits) cat("\nMultiple R2 \n") print(object$R2,digits) cat("\nCohen's set correlation R2 \n") print(object$Rset,digits) cat("\nSquared Canonical Correlations\n") print(object$cancor2,digits) }, irt.fa = { cat("\nItem Response Theory using factor analysis with Call: ") print(object$Call) nfactors <- dim(object$fa$loadings)[2] objective <- object$fa$criteria[1] if(!is.null(objective)) { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.") cat("\nThe degrees of freedom for the model is",object$fa$dof," and the objective function was ",round(objective,digits),"\n") if(!is.na(object$fa$n.obs)) {cat("The number of observations was ",object$fa$n.obs, " with Chi Square = ",round(object$fa$STATISTIC,digits), " with prob < ", signif(object$fa$PVAL,digits),"\n")} if(!is.null(object$fa$rms)) {cat("\nThe root mean square of the residuals (RMSA) is ", round(object$fa$rms,digits),"\n") } if(!is.null(object$fa$crms)) {cat("The df corrected root mean square of the residuals is ", round(object$fa$crms,digits),"\n") } if(!is.null(object$fa$TLI)) cat("\nTucker Lewis Index of factoring reliability = ",round(object$fa$TLI,digits+1))} if(!is.null(object$fa$RMSEA)) {cat("\nRMSEA index = ",round(object$fa$RMSEA[1],digits+1), " and the", (1- object$fa$RMSEA[4])*100,"% confidence intervals are ",round(object$fa$RMSEA[2:3],digits+1)) } if(!is.null(object$fa$BIC)) {cat("\nBIC = ",round(object$fa$BIC,digits))} if(!is.null(object$fa$Phi)) { if(object$fa$fn == "principal") {cat ("\n With component correlations of \n" ) } else {cat ("\n With factor correlations of \n" )} colnames(object$fa$Phi) <- rownames(object$fa$Phi) <- colnames(object$fa$loadings) print(round(object$fa$Phi,digits))} }, cohen.d = {cat("Extract effect sizes from cohen.d\n") print(object$Call) cat("\nMultivariate (Mahalanobis) distance between groups", round(object$M.dist,digits=digits)) cat("\n r equivalent for each variable\n") print(round(object$r,digits=digits)) }, cohen.d.by = {cat("Extract effect sizes by groups from cohen.d.by\n") ncases <- length(object) effects <- list() Md <- rep(NA,ncases) for (i in (1:ncases)) { effects[i] <- list(object[[i]]$cohen.d[,2])} effect.df <- data.frame(matrix(unlist(effects),nrow=ncases,byrow=TRUE)) for(i in(1:ncases)){Md[i] <- object[[i]]$M.dist} colnames(effect.df) <- rownames(object[[1]]$cohen.d) effect.df <- cbind(effect.df,Md) rownames(effect.df) <- names(object) print(effect.df,digits=digits) }, mediate = {summary_psych.mediate(object,digits=digits) }, describeData = { cat('n.obs = ', object$n.obs, "of which ", object$complete.cases," are complete cases. Number of variables = ",object$nvar," of which all are numeric is ",object$all.numeric,"\n")}, none = {warning("I am sorry, I do not have a summary function for this object")} ) #end of switch if( items) { if(object=="omega") { cat("\nSchmid Leiman Factor loadings:\n") print(object$schmid$sl) numfactors <- dim(object$schmid$sl)[2] -2 eigenvalues <- diag(t(object$schmid$sl[,1:numfactors]) %*% object$schmid$sl[,1:numfactors]) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=digits) } if(!is.null(object$item.cor) ) { cat("\nItem by scale correlations:\n") print(object$item.cor,digits) } if (!is.null(object$p.sorted$sorted)) { cat("\nItem by Cluster Structure matrix:\n") print(object$p.sorted$sorted,digits) } if (!is.null(object$purified$pattern)) { cat("\nItem by Cluster Pattern matrix:\n") print(object$purified$pattern,digits) } if(vss) { cat("\nVelicer MAP\n") print(object$map,digits) cat("\nVery Simple Structure Complexity 1\n") print(object$cfit.1,digits) cat("\nVery Simple Structure Complexity 2\n") print(object$cfit.2,digits) } } #end if items #invisible(result) } # psych/R/cortest.R0000644000176200001440000000532314264563533013425 0ustar liggesusers"cortest" <- function(R1,R2=NULL, n1=NULL,n2=NULL,fisher=TRUE,cor=TRUE, method = "pearson",use="pairwise") { cl <- match.call() if ((dim(R1)[1] != dim(R1)[2]) & cor) {n1 <- dim(R1)[1] # message("R1 was not square, finding R from data") R1 <- cor(R1,use=use,method=method)} if(!is.matrix(R1) ) R1 <- as.matrix(R1) #converts data.frames to matrices if needed p <- dim(R1)[2] if(is.null(n1)) {n1 <- 100 warning("n not specified, 100 used") } if(is.null(R2)) { if(fisher) {R <- 0.5*log((1+R1)/(1-R1)) R2 <- R*R} else {R2 <- R1*R1} if(cor) {diag(R2) <- 0 E <- (sum(R2*lower.tri(R2))) z <- sum(R*lower.tri(R)) df <- p*(p-1)/2} else { E <- sum(R2) z <- sum(R1) df <- ncol(R1) * nrow(R1)} chisq <- E *(n1-3) z <- z /sqrt(n1-3) p.val <- pchisq(chisq,df,lower.tail=FALSE) } else { #end of 1 matrix test if ((dim(R2)[1] != dim(R2)[2]) & cor) {n2 <- dim(R2)[1] message("R2 was not square, finding R from data") R2 <- cor(R2,use=use, method=method)} if(!is.matrix(R2) ) R2 <- as.matrix(R2) if(fisher) { R1 <- 0.5*log((1+R1)/(1-R1)) R2 <- 0.5*log((1+R2)/(1-R2)) if(cor) {diag(R1) <- 0 diag(R2) <- 0} } R <- R1 - R2 #direct difference R2 <- R*R if(is.null(n2)) n2 <- n1 n <- (n1*n2)/(n1+n2) #1/2 harmonic sample size if(cor) { E <- (sum(R2*lower.tri(R2))) #just count the lower diagonal elements chisq <- E *(n-3) df <- p*(p-1)/2 z <- sum(R2*lower.tri(R2)) / sqrt(n-3)} else {E <- sum(R2) chisq <- E * (n-3) df <- ncol(R2) * nrow(R2) z <- sum(R2) / sqrt(n-3)} p.val <- pchisq(chisq,df,lower.tail=FALSE) } if (is.null(n2) ) z <- NULL result <- list(chi2=chisq,prob=p.val,df=df,z=z,Call=cl) class(result) <- c("psych","cortest") return(result) } #version of June 25, 2008 #revised October 12, 2011 to allow non-square matrices test.cortest <- function(R=NULL,n.var=10,n1=100,n2=1000,n.iter=1000) { if(is.null(R)) R <- diag(1,n.var) summary <- list() for(i in 1:n.iter) { x <- sim.correlation(R,n1) if(n2 >3 ) { y <- sim.correlation(R,n2) summary[[i]] <- cortest(x,y,n1=n1,n2=n2)$prob } else {summary[[i]] <- cortest(x,n1=n1)$prob } } result <- unlist(summary) return(result) } psych/R/polar.R0000644000176200001440000000224713501503363013045 0ustar liggesusers#polar a function to convert a factor loadings matrix to polar coordinates #Version of Sept 9, 2007 #Slightly revised July 25, 2009 #corrected June 16, 2019 to take square roots of communalities "polar" <- function(f,sort=TRUE) #return all pairwise polar coordinates { if (!is.matrix(f) && !is.data.frame(f) ) {fload <-f$loadings} else {fload <- f} nf <- dim(fload)[2] n.var <- dim(fload)[1] polar <- matrix(0,nrow=n.var,ncol = nf * (nf-1)+1) if (!is.null(rownames(fload))) {rownames(polar) <- rownames(fload)} else {rownames(polar) <- paste("v",1:n.var,sep="") } colnames(polar) <- rep(NA,nf*(nf-1)+1) #just give it something to play with polar[,1] <- seq(1:n.var) colnames(polar)[1] <- "Var" k <- 2 kk <- nf*(nf-1)/2 for (i in 2:nf) { for (j in 1:(i-1)) { vector.length <- sqrt(fload[,i]^2 + fload[,j]^2) theta=sign(fload[,i])*180*acos(fload[,j]/(vector.length))/pi #vector angle (-180: 180) polar[,k] <- theta %% 360 polar[,k+kk] <- vector.length colnames(polar)[k] <- paste("theta",i,j,sep="") colnames(polar)[k+kk] <- paste("vecl",i ,j,sep="") k <- k + 1 } } if (sort) {polar <- polar[order(polar[,2]),] } return(polar) } psych/R/factor2cluster.R0000644000176200001440000000167014472015531014674 0ustar liggesusers"factor2cluster" <- function (loads,cut=.0,aslist=FALSE) { if (!is.matrix(loads) ) {l <-loads$loadings} else {l <- loads} l <- as.matrix(l) nrows <- dim(l)[1] ncols <- dim(l)[2] if (ncols ==1) {m1 <- matrix(rep(1,nrows),ncol=1) } else { m1 <- matrix(apply(t(apply(l, 1, abs)), 1, which.max), ncol = 1)} id <- matrix(c(1:nrows, m1), ncol = 2) #index row and column id[is.na(id)] <- NA #actually looking for NAN factor2cluster <- matrix(rep(0, ncols * nrows), ncol = ncols) factor2cluster[id] <- sign(l[id])*( (abs(l[id]) >cut)+0) #only loadings > cut rownames(factor2cluster) <- rownames(l) colnames(factor2cluster) <- colnames(l) nitems <- colSums(abs(factor2cluster)) for (i in ncols:1) {if (nitems[i]<1) {factor2cluster <- factor2cluster[,-i,drop=FALSE]} }#remove columns with no variables if(aslist) factor2cluster <- keys2list(factor2cluster) return(factor2cluster) } psych/R/scoreFast.r0000644000176200001440000001631414075121315013721 0ustar liggesusers#created 7/12/16 #just score items without a lot of stats #basically scoreItems with all the stats removed\ #Parallelized July 28, 2018 and report the number of responses/scale #added the "mollycoddle" feature March 19, 2019 to help the clueless user "scoreFast" <- function (keys,items,totals=FALSE,ilabels=NULL, missing=TRUE, impute="none",delete=TRUE, min=NULL,max=NULL,count.responses=FALSE,digits=2) { smallFunction <- function(scale,keys) { if(is.null(keys)) return(NULL) pos.item <- items[,which(keys[,scale] > 0)] neg.item <- items[,which(keys[,scale] < 0)] neg.item <- max + min - neg.item sub.item <- cbind(pos.item,neg.item) if(count.responses) rs <- rowSums(!is.na(sub.item)) if(totals) { scores <- rowSums(sub.item,na.rm=TRUE)} else {scores <- rowMeans(sub.item,na.rm=TRUE) } if(count.responses) {return(c(scores,rs))} else {return(scores) } } cl <- match.call() if(is.data.frame(keys)) stop("I think you reversed keys and items. I am stopping") raw.data <- TRUE if(impute == FALSE) impute <- "none" if(is.list(keys)) {select <- sub("-","",unlist(keys)) select <- select[!duplicated(select)] select <- select[!is.na(select)] #check for bad input -- the Mollycoddle option if(any( !(select %in% colnames(items)) )) { cat("\nVariable names are incorrect. Offending items are ", select[which(!(select %in% colnames(items)))],"\n") stop("Improper input. See above. ")} } else { keys <- keys2list(keys) select <- selectFromKeyslist(colnames(items),keys) select <- select[!duplicated(select)] select <- select[!is.na(select)]} #added 11/23/18 items <- items[,select,drop=FALSE] keynames <- colnames(keys) keys <- make.keys(items,keys) #added 9/9/16 keys <- as.matrix(keys) #just in case they were not matrices to start with n.keys <- dim(keys)[2] n.items <- dim(keys)[1] abskeys <- abs(keys) keynames <- colnames(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale num.ob.item <- num.item #will be adjusted in case of impute = FALSE if (!missing) items <- na.omit(items) n.subjects <- dim(items)[1] items <- as.matrix(items) # response.freq <- response.frequencies(items) item.var <- apply(items,2,sd,na.rm=TRUE) bad <- which((item.var==0)|is.na(item.var)) if((length(bad) > 0) && delete) { for (baddy in 1:length(bad)) {warning( "Item= ",colnames(items)[bad][baddy] , " had no variance and was deleted from the data and the keys.")} items <- items[,-bad] keys <- as.matrix(keys[-bad,]) n.items <- n.items - length(bad) abskeys <- abs(keys) colnames(keys) <- keynames } item.means <- colMeans(items,na.rm=TRUE) if (is.null(min)) {min <- min(items,na.rm=TRUE)} if (is.null(max)) {max <- max(items,na.rm=TRUE)} # miss.rep <- rowSums(is.na(items)) miss.rep <- (is.na(items) +0) %*% abs(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale num.ob.item <- num.item #will be adjusted in case of impute = FALSE if(impute !="none") { miss <- which(is.na(items),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(items,na.rm=TRUE) #replace missing values with means items[miss]<- item.means[miss[,2]]} else { item.med <- apply(items,2,median,na.rm=TRUE) #replace missing with medians items[miss]<- item.med[miss[,2]]} #this only works if items is a matrix scores <- items %*% keys #this actually does all the work but doesn't handle missing values } else { #handle the case of missing data without imputation scores <- matrix(NaN,ncol=n.keys,nrow=n.subjects) scoresList <- mcmapply(smallFunction,c(1:n.keys),MoreArgs=list(keys=keys)) #the parallelized function } if (is.null(ilabels)) { if (totals) {#ilabels<- paste("S",1:n.keys,sep="")} else { #ilabels <- paste("A",1:n.keys,sep="")} } ilabels<- paste(keynames,"S",sep="-")} else { ilabels <- paste(keynames,"A",sep="-")} } if(count.responses) { scores <- scoresList[1:n.subjects,,drop=FALSE] #added drop 11/14/20 responses <- scoresList[(n.subjects+1):nrow(scoresList),,drop=FALSE] colnames(scores) <- ilabels colnames(responses) <- ilabels results <- list(scores=scores,responses = responses)} else { scores <- scoresList colnames(scores) <- ilabels results <- scores} #class(results) <- c("psych", "score.items") return(results) } #created July 27, 2018 "scoreVeryFast" <- function(keys,items,totals=FALSE, min=NULL,max=NULL,count.responses=FALSE) { #just scores by addition, no imputation, nothing fancy if(is.data.frame(keys)) stop("I think you reversed keys and items. I am stopping") #use this for parallelism smallFunction <- function(scale,keys) { pos.item <- items[,which(keys[,scale] > 0)] neg.item <- items[,which(keys[,scale] < 0)] neg.item <- max + min - neg.item sub.item <- cbind(pos.item,neg.item) if(count.responses) rs <- rowSums(!is.na(sub.item)) if(totals) { scores <- rowSums(sub.item,na.rm=TRUE)} else {scores <- rowMeans(sub.item,na.rm=TRUE) } if(count.responses) {return(c(scores,rs))} else {return(scores) } } if(is.list(keys)) {select <- sub("-","",unlist(keys)) select <- select[!duplicated(select)] if(any( !(select %in% colnames(items)) )) { cat("\nVariable names are incorrect. Offending items are ", select[which(!(select %in% colnames(items)))],"\n") stop("Improper input. See above. ")} } else { keys <- keys2list(keys) select <- selectFromKeyslist(colnames(items),keys) select <- select[!duplicated(select)]} items <- items[,select,drop=FALSE] n.subjects <- NROW(items) keys <- make.keys(items,keys) #added 9/9/16 keys <- as.matrix(keys) #just in case they were not matrices to start with n.keys <- dim(keys)[2] n.items <- dim(keys)[1] abskeys <- abs(keys) keynames <- colnames(keys) if(is.null(keynames)) {if (totals) {keynames<- paste("S",1:n.keys,sep="")} else { keynames <- paste("A",1:n.keys,sep="")} } num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale n.subjects <- dim(items)[1] items <- as.matrix(items) scores <- matrix(NaN,ncol=n.keys,nrow=n.subjects) if (is.null(min)) {min <- min(items,na.rm=TRUE)} if (is.null(max)) {max <- max(items,na.rm=TRUE)} #use mapply for debugging, mcmapply for parallel processing #items are global and not passed scoresList <- mcmapply(smallFunction,c(1:n.keys),MoreArgs=list(keys=keys)) #the parallelized function if(count.responses) { scores <- scoresList[1:n.subjects,] responses <- scoresList[(n.subjects+1):nrow(scoresList),] colnames(scores) <- keynames colnames(responses) <- keynames results <- list(scores=scores,responses = responses)} else { scores <- scoresList colnames(scores) <- keynames results <- scores} return(results) } psych/R/fa.rgraph.R0000644000176200001440000001163413144646265013614 0ustar liggesusers#Created May 24, 2007 #modifed April 12, 2008 to allow figures from matrices that are not loadings #take the output from a factor analysis and graph it using rgraphviz #Except for the ability to write a dot file, this has been replaced by fa.diagram to avoid using Rgraphviz (September, 2009) "fa.rgraph" <- function(fa.results,out.file=NULL,labels=NULL,cut=.3,simple=TRUE, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,main="Factor Analysis",graphviz=TRUE, ...){ if (!requireNamespace('Rgraphviz')) {stop("I am sorry, you need to have loaded the Rgraphviz package") #create several dummy functions to get around the "no visible global function definition" problem nodes <- function() {} addEdge <- function() {} subGraph <- function(){} } Phi <- NULL #the default case if((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) Phi <- fa.results$Phi} else {factors <- fa.results} rank.direction <- match.arg(rank.direction) #first some basic setup parameters num.var <- dim(factors)[1] #how many variables? if (is.null(num.var) ){num.var <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} if (simple) {k=1} else {k <- num.factors} vars <- paste("V",1:num.var,sep="") fact <- paste("F",1:num.factors,sep="") clust.graph <- new("graphNEL",nodes=c(vars,fact),edgemode="directed") graph.shape <- c(rep("box",num.var),rep("ellipse",num.factors)) graph.rank <- c(rep("sink",num.var),rep("min",num.factors)) names(graph.shape) <- nodes(clust.graph) names(graph.rank) <- nodes(clust.graph) edge.label <- rep("",num.var*k) edge.name <- rep("",num.var*k) names(edge.label) <- seq(1:num.var*k) edge.dir <- rep("forward",num.var*k) #show the cluster structure with ellipses l <- factors if (num.factors ==1) { for (i in 1:num.var) { clust.graph <- addEdge(fact[1], vars[i], clust.graph,1) edge.label[i] <- round(factors[i],digits) edge.name[i] <- paste(fact[1],"~",vars[i],sep="") } } else { if(simple){ #very simple structure is one loading per variable m1 <- matrix(apply(t(apply(l, 1, abs)), 1, which.max), ncol = 1) for (i in 1:num.var) {clust.graph <- addEdge(fact[m1[i]], vars[i], clust.graph,1) edge.label[i] <- round(factors[i,m1[i]],digits) edge.name[i] <- paste(fact[m1[i]],"~",vars[i],sep="") } } else { #all loadings > cut in absolute value k <- 1 for (i in 1:num.var) { for (f in 1:num.factors) { if (abs(factors[i,f]) > cut) {clust.graph <- addEdge(fact[f], vars[i], clust.graph,1) edge.label[k] <- round(factors[i,f],digits) edge.name[k] <- paste(fact[f],"~",vars[i],sep="") k <- k+1 } #end of if } #end of factor } # end of variable loop } #end of if simple else } #end of if num.factors ==1 if(!is.null(Phi)) { k <- num.var +1 for (f in 2:num.factors) { for (f1 in 1:(f-1)) { if(Phi[f,f1] > cut) { clust.graph <- addEdge(fact[f1], fact[f], clust.graph,1) edge.label[k] <- round(Phi[f,f1],digits) edge.name[k] <- paste(fact[f1],"~",fact[f],sep="") edge.dir[k] <- paste("both") k <- k+1} } } } nAttrs <- list() #node attributes eAttrs <- list() #edge attributes if (!is.null(labels)) {var.labels <- c(labels,fact) names(var.labels) <- nodes(clust.graph) nAttrs$label <- var.labels names(edge.label) <- edge.name } names(edge.label) <- edge.name names(edge.dir) <- edge.name nAttrs$shape <- graph.shape nAttrs$rank <- graph.rank eAttrs$label <- edge.label eAttrs$dir <- edge.dir #eAttrs$font <- edge.font attrs <- list(node = list(shape = "ellipse", fixedsize = FALSE),graph=list(rankdir=rank.direction, fontsize=edge.font[2],bgcolor="white" )) obs.var <- subGraph(vars,clust.graph) cluster.vars <- subGraph(fact,clust.graph) observed <- list(list(graph=obs.var,cluster=TRUE,attrs=c(rank="sink")),list(graph=cluster.vars,cluster=FALSE ,attrs=c(rank = "source"))) #this crashes for correlated factors solution observed <- list(list(graph=obs.var,cluster=TRUE,attrs=c(rank="sink"))) #this does not lead to a crash if(!is.null(out.file) ){toDotty(clust.graph,out.file,nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs) } plot(clust.graph, nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs,subGList=observed,main=main) return(clust.graph) } psych/R/phi2poly.R0000644000176200001440000000373712267015244013507 0ustar liggesusers"phi2poly" <- function(ph,cp,cc,n=NULL,correct=TRUE) { #.Deprecated(phi2tetra, msg='phi2poly is deprecated, please use phi2tetra') #ph is the phi coefficient #cp is the selection ratio of the predictor #cc is the success rate of the criterion r.marg<-rep(0,2) c.marg<- rep(0,2) p<-array(rep(0,4),dim=c(2,2)) r.marg[1]<- cp r.marg[2]<- 1 -cp c.marg[1]<- cc c.marg[2]<- 1-cc p[1,1]<- r.marg[1]*c.marg[1]+ ph*sqrt(prod(r.marg,c.marg)) p[2,2]<- r.marg[2]*c.marg[2]+ ph*sqrt(prod(r.marg,c.marg)) p[1,2]<- r.marg[1]*c.marg[2]- ph*sqrt(prod(r.marg,c.marg)) p[2,1]<- r.marg[2]*c.marg[1]- ph*sqrt(prod(r.marg,c.marg)) if(!is.null(n)) p <- p*n result<-tetrachoric(p,correct=correct )$rho return(result)} "phi2tet" <- function(ph,cp,cc,n=NULL,correct=TRUE) { if(is.null(n)) n <- 1 #ph is the phi coefficient #cp is the selection ratio of the predictor #cc is the success rate of the criterion r.marg<-rep(0,2) c.marg<- rep(0,2) p<-array(rep(0,4),dim=c(2,2)) r.marg[1]<- cp/n r.marg[2]<- 1 -cp/n c.marg[1]<- cc/n c.marg[2]<- 1-cc/n p[1,1]<- r.marg[1]*c.marg[1]+ ph*sqrt(prod(r.marg,c.marg)) p[2,2]<- r.marg[2]*c.marg[2]+ ph*sqrt(prod(r.marg,c.marg)) p[1,2]<- r.marg[1]*c.marg[2]- ph*sqrt(prod(r.marg,c.marg)) p[2,1]<- r.marg[2]*c.marg[1]- ph*sqrt(prod(r.marg,c.marg)) if(!is.null(n)) p <- p*n result<-tetrachoric(p,correct=correct )$rho return(result)} "phi2tetra" <- function(ph,m,n=NULL,correct=TRUE) { if(!is.matrix(ph) && !is.data.frame(ph)) {result <- phi2tet(ph,m[1],m[2],n=n,correct=correct) } else { nvar <- nrow(ph) if(nvar !=ncol(ph)) {stop('Matrix must be square')} if (length(m) !=nvar) {stop("length of m must match the number of variables")} result <- as.matrix(ph) for(i in 2:nvar) { for (j in 1:(i-1)) { result[i,j] <- result[j,i] <- phi2tet(ph[i,j],m[i],m[j],n=n,correct=correct) } } } return(result) } psych/R/dia.cone.R0000644000176200001440000000223211764736465013426 0ustar liggesusers"dia.cone" <- function(x=0, y=-2, theta=45, arrow=TRUE,curves=TRUE,add=FALSE,labels=NULL,xlim = c(-1, 1), ylim=c(-1,1),... ) { segments = 51 extend = 1.1 xrange=2 #yrange=1 height= xrange xs <- tan(theta*pi/180) * height ys =.3 * xs angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) ellipse <- unit.circle ellipse[, 1] <- ellipse[, 1] * xs + x ellipse[, 2] <- ellipse[, 2] * ys + y + height if(!add) {plot.new() plot.window(xlim=xlim*2,ylim=ylim,...)} lines(ellipse, ...) if(arrow) { arrows(x,y,(x-xs),y+ height,lty="dashed") arrows(x,y,(x + xs),y+ height,lty="dashed") arrows(x,y,x,y + extend^2 * height)} else { #don't draw arrows, just the cone coords <- matrix(c(x,x-xs,y,y+height),2,2) lines(coords) coords <- matrix(c(x,x+xs,y,y+height),2,2) lines(coords)} if(curves) {dia.curve(c(x,y+height/3),c(x-xs/3,y+height/3),scale=.2,labels=labels[1]) dia.curve(c(x,y+height/3),c(x+xs/3,y+height/3),scale=.2,labels=labels[2]) dia.curve(c(x-xs/2,y+height/2),c(x+xs/2,y+height/2),scale=.3,labels=labels[3]) } } psych/R/fa.R0000744000176200001440000012106314313663277012332 0ustar liggesusers#a function to do principal axis, minres, weighted least squares and maximimum likelihood factor analysis #basically, just combining the three separate functions #the code for wls and minres is adapted from the factanal function #the optimization function in ml is taken almost directly from the factanal function #created May 28, 2009 #modified June 7, 2009 to add gls fitting #modified June 24, 2009 to add ml fitting #modified March 4, 2010 to allow for factoring of covariance matrices rather than correlation matrices #this itself is straight foward, but the summary stats need to be worked on #modified April 4, 2011 to allow for factor scores of oblique or orthogonal solution #In May, 2011, fa was added as a wrapper to do iterations, and the original fa function was changed to fac. The functionality of fa has not changed. #Revised November, 2012 to add the minchi option for factoring. This minimizes the sample size weighted residual matrix #Revised 1/2/14 to add mclapply (multicore) feature. Increase in speed is 50\% for two cores, but only 63\% for 4 cores or 76\% for 8 cores #dropped the fisherz transform on loadings and phis #6/12/14 Added the ability to find tetrachorics, polychorics, or mixed cors. #15/1/15 Fixed the way we handle missing and imputation to actually work. #19/1/15 modified calls to rotation functions to meet CRAN specs using nameSpace "fa" <- function(r,nfactors=1,n.obs = NA,n.iter=1,rotate="oblimin",scores="regression", residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="none", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,oblique.scores=FALSE,np.obs=NULL,use="pairwise",cor="cor",correct=.5,weight=NULL,n.rotations=1,hyper=.15,smooth=TRUE, ...) { cl <- match.call() if(isCorrelation(r)) {if(is.na(n.obs) && (n.iter >1)) stop("You must specify the number of subjects if giving a correlation matrix and doing confidence intervals") if(length(class(r)) > 1) { if( inherits(r,"partial.r")) class(r) <- c("matrix","array") } #in case we are using partial.r of class psych and partial.r added 4/10/21 } f <- fac(r=r,nfactors=nfactors,n.obs=n.obs,rotate=rotate,scores=scores,residuals=residuals,SMC = SMC, covar=covar, missing=missing, impute=impute, min.err=min.err, max.iter=max.iter, symmetric=symmetric, warnings=warnings, fm=fm, alpha=alpha, oblique.scores=oblique.scores, np.obs=np.obs,use=use, cor=cor, correct=correct, weight=weight, n.rotations=n.rotations, hyper=hyper,smooth=smooth, ...=...) #call fa with the appropriate parameters fl <- f$loadings #this is the original nvar <- dim(fl)[1] if(n.iter > 1) { if(is.na(n.obs) ) {n.obs <- f$n.obs} replicates <- list() rep.rots <- list() replicateslist <- parallel::mclapply(1:n.iter,function(x) { #replicateslist <- lapply(1:n.iter,function(x) { if(isCorrelation(r)) {#create data sampled from multivariate normal with observed correlation mu <- rep(0, nvar) #X <- mvrnorm(n = n.obs, mu, Sigma = r, tol = 1e-06, empirical = FALSE) #the next 3 lines replaces mvrnorm (taken from mvrnorm, but without the checks) eX <- eigen(r) X <- matrix(rnorm(nvar * n.obs),n.obs) X <- t(eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(X)) } else {X <- r[sample(n.obs,n.obs,replace=TRUE),]} fs <- fac(X,nfactors=nfactors,rotate=rotate,scores="none",SMC = SMC,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor,correct=correct,n.rotations=n.rotations,hyper=hyper,smooth=smooth, ...=...) #call fa with the appropriate parameters if(nfactors == 1) {replicates <- list(loadings=fs$loadings)} else { t.rot <- target.rot(fs$loadings,fl) if(!is.null(fs$Phi)) { phis <- fs$Phi # should we rotate the simulated factor correlations? #we should report the target rotated phis, not the untarget rotated phis replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(t.rot$Phi)]) #corrected 6/10/15 #replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(phis)]) } else {replicates <- list(loadings=t.rot$loadings)} }}) replicates <- matrix(unlist(replicateslist),nrow=n.iter,byrow=TRUE) means <- colMeans(replicates,na.rm=TRUE) sds <- apply(replicates,2,sd,na.rm=TRUE) if(length(means) > (nvar * nfactors) ) { means.rot <- means[(nvar*nfactors +1):length(means)] sds.rot <- sds[(nvar*nfactors +1):length(means)] ci.rot.lower <- means.rot + qnorm(p/2) * sds.rot ci.rot.upper <- means.rot + qnorm(1-p/2) * sds.rot ci.rot <- data.frame(lower=ci.rot.lower,upper=ci.rot.upper) } else { rep.rots <- NULL means.rot <- NULL sds.rot <- NULL z.rot <- NULL ci.rot <- NULL } means <- matrix(means[1:(nvar*nfactors)],ncol=nfactors) sds <- matrix(sds[1:(nvar*nfactors)],ncol=nfactors) tci <- abs(means)/sds ptci <- 1-pnorm(tci) if(!is.null(rep.rots)) { tcirot <- abs(means.rot)/sds.rot ptcirot <- 1- pnorm(tcirot)} else {tcirot <- NULL ptcirot <- NULL} ci.lower <- means + qnorm(p/2) * sds ci.upper <- means + qnorm(1-p/2) * sds ci <- data.frame(lower = ci.lower,upper=ci.upper) class(means) <- "loadings" colnames(means) <- colnames(sds) <- colnames(fl) rownames(means) <- rownames(sds) <- rownames(fl) f$cis <- list(means = means,sds = sds,ci = ci,p =2*ptci, means.rot=means.rot,sds.rot=sds.rot,ci.rot=ci.rot,p.rot = ptcirot,Call= cl,replicates=replicates,rep.rots=rep.rots) results <- f results$Call <- cl class(results) <- c("psych","fa.ci") } else {results <- f results$Call <- cl class(results) <- c("psych","fa") } return(results) } #written May 1 2011 #modified May 8, 2014 to make cis an object in f to make sorting easier ######################################################## #the main function "fac" <- function(r,nfactors=1,n.obs = NA,rotate="oblimin",scores="tenBerge",residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1,oblique.scores=FALSE,np.obs=NULL, use="pairwise",cor="cor", correct=.5,weight=NULL, n.rotations=1, hyper=.15, smooth=TRUE, ...) { cl <- match.call() control <- NULL #if you want all the options of mle, then use factanal ##first some functions that are internal to fa #this does the WLS or ULS fitting depending upon fm "fit.residuals" <- function(Psi,S,nf,S.inv=NULL,fm) { diag(S) <- 1- Psi if(!is.null(S.inv)) sd.inv <- diag(1/diag(S.inv)) eigens <- eigen(S) eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps if(nf >1 ) {loadings <- eigens$vectors[,1:nf] %*% diag(sqrt(eigens$values[1:nf])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) #use switch to clean up the code switch(fm, wls = {residual <- sd.inv %*% (S- model)^2 %*% sd.inv}, gls = {residual <- (S.inv %*%(S - model))^2 } , uls = {residual <- (S - model)^2}, # minres = {residual <- (S - model)^2 # diag(residual) <- 0}, ols = {residual <- (S-model) residual <- residual[lower.tri(residual)] residual <- residual^2}, minres = {residual <- (S-model) residual <- residual[lower.tri(residual)] residual <- residual^2}, old.min = {residual <- (S-model) residual <- residual[lower.tri(residual)] residual <- residual^2}, minchi = {residual <- (S - model)^2 #min chi does a minimum residual analysis, but weights the residuals by their pairwise sample size residual <- residual * np.obs diag(residual) <- 0 }) # #weighted least squares weights by the importance of each variable # if(fm == "wls" ) {residual <- sd.inv %*% (S- model)^2 %*% sd.inv} else {if (fm=="gls") {residual <- (S.inv %*%(S - model))^2 } else {residual <- (S - model)^2 #this last is the uls case # if(fm == "minres") {diag(residual) <- 0} #this is minimum residual factor analysis, ignore the diagonal # if(fm=="minchi") {residual <- residual * np.obs # diag(residual) <- 0 } #min chi does a minimum residual analysis, but weights the residuals by their pairwise sample size # }} # the uls solution usually seems better than wls or gls? # # error <- sum(residual) } #this next section is taken (with minor modification to make ULS, WLS or GLS) from factanal #it has been further modified with suggestions by Hao Wu to improve the ols/minres solution (Apri, 2017) #it does the iterative calls to fit.residuals #modified June 7, 2009 to add gls fits #Modified December 11, 2009 to use first derivatives from formula rather than empirical. This seriously improves the speed. #but does not seem to improve the accuracy of the minres/ols solution (note added April, 2017) "fit" <- function(S,nf,fm,covar) { if(is.logical(SMC)) {S.smc <- smc(S,covar)} else{ S.smc <- SMC } #added this option, August 31, 2017 upper <- max(S.smc,1) #Added Sept 11,2018 to handle case of covar , adjusted October 24 by adding 1 if((fm=="wls") | (fm =="gls") ) {S.inv <- solve(S)} else {S.inv <- NULL} if(!covar &&(sum(S.smc) == nf) && (nf > 1)) {start <- rep(.5,nf)} else {start <- diag(S)- S.smc} #initial communality estimates are variance - smc unless smc = 1 if(fm=="ml" || fm=="mle" ) {res <- optim(start, FAfn, FAgr, method = "L-BFGS-B", lower = .005, upper = upper, control = c(list(fnscale=1, parscale = rep(0.01, length(start))), control), nf = nf, S = S) } else { if(fm=="ols" ) { #don't use a gradient if(is.logical(SMC)) {start <- diag(S)- smc(S,covar)} else {start <- SMC} #added covar 9/11/18 res <- optim(start, FA.OLS, method = "L-BFGS-B", lower = .005, upper = upper, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S ) } else { if((fm=="minres")| (fm=="uls")) { #which is actually the same as OLS but we use the gradient start <- diag(S)- smc(S,covar) #added 9/11/18 ## is this correct, or backward? res <- optim(start, fit.residuals,gr=FAgr.minres, method = "L-BFGS-B", lower = .005, upper = upper, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S,fm=fm) } else { #this is the case of old.min start <- smc(S,covar) #added 9/11/18 ##but why is this not diag(S)-smc(S,covar) res <- optim(start, fit.residuals,gr=FAgr.minres2, method = "L-BFGS-B", lower = .005, upper = upper, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S, S.inv=S.inv,fm=fm ) } } } if((fm=="wls") | (fm=="gls") | (fm =="ols") | (fm =="uls")| (fm=="minres") | (fm=="old.min")) {Lambda <- FAout.wls(res$par, S, nf)} else { Lambda <- FAout(res$par, S, nf)} result <- list(loadings=Lambda,res=res,S=S) } ## the next two functions are taken directly from the factanal function in order to include maximum likelihood as one of the estimation procedures FAfn <- function(Psi, S, nf) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE, only.values = TRUE) e <- E$values[-(1:nf)] e <- sum(log(e) - e) - nf + nrow(S) -e } FAgr <- function(Psi, S, nf) #the first derivatives { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1:nf, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1:nf] - 1, 0)), nf) load <- diag(sqrt(Psi)) %*% load g <- load %*% t(load) + diag(Psi) - S # g <- model - data diag(g)/Psi^2 #normalized } # FAgr.minres.old <- function(Psi, S, nf,S.inv,fm) #the first derivatives -- no longer used # { sc <- diag(1/sqrt(Psi)) # Sstar <- sc %*% S %*% sc # E <- eigen(Sstar, symmetric = TRUE) # L <- E$vectors[, 1:nf, drop = FALSE] # load <- L %*% diag(sqrt(pmax(E$values[1:nf] - 1, 0)), nf) # load <- diag(sqrt(Psi)) %*% load # model <- load %*% t(load) # g <- diag(Psi) - diag(S -model) # g <- model - data # if(fm=="minchi") {g <- g*np.obs} # diag(g)/Psi^2 #normalized # } FAgr.minres2 <- function(Psi, S, nf,S.inv,fm) #the first derivatives used by old.min { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1:nf, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1:nf]-1 , 0)), nf) load <- diag(sqrt(Psi)) %*% load g <- load %*% t(load) + diag(Psi) - S # g <- model - data if(fm=="minchi") {g <- g*np.obs} #normalized diag(g)/Psi^2 } FAgr.minres <- function(Psi, S, nf,fm) #the first derivatives used by minres { Sstar <- S - diag(Psi) E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1:nf, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1:nf] , 0)), nf) # load <- diag(sqrt(Psi)) %*% load g <- load %*% t(load) + diag(Psi) - S # g <- model - data #if(fm=="minchi") {g <- g*np.obs} #normalized diag(g) } #this was also taken from factanal FAout <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1L:q, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1L:q] - 1, 0)), q) diag(sqrt(Psi)) %*% load } #This is modified from factanal -- the difference in the loadings is that these produce orthogonal loadings, but slightly worse fit FAout.wls <- function(Psi, S, q) { diag(S) <- diag(S)- Psi # added diag(S) - Psi instead of 1- Psi to handle covar=TRUE 9/11/18 E <- eigen(S,symmetric = TRUE) # L <- E$vectors[,1L:q,drop=FALSE] %*% diag(sqrt(E$values[1L:q,drop=FALSE]),q) L <- E$vectors[,1L:q,drop=FALSE] %*% diag(sqrt(pmax(E$values[1L:q,drop=FALSE],0)),q) #added the > 0 test August 30, 2017 return(L) } #this takes advantage of the glb.algebraic function to do min.rank factor analysis "MRFA" <- function(S,nf) { com.glb <- glb.algebraic(S) L <- FAout.wls(1-com.glb$solution,S,nf) h2 <- com.glb$solution result <- list(loadings =L, communality = h2) } #The next function was adapted by WR from a suggestion by Hao Wu (April 12, 2017) FA.OLS <- function(Psi,S,nf) { E <- eigen(S-diag(Psi),symmetric=T) U <-E$vectors[,1:nf,drop=FALSE] D <- E$values[1:nf,drop=FALSE] D [D < 0] <- 0 if(length(D) < 2) {L <- U * sqrt(D)} else { L <- U %*% diag(sqrt(D))} #gets around a weird problem for nf=1 model <- L %*% t(L) diag(model) <- diag(S) return(sum((S-model)^2)/2) } ##The gradient function speeds up the function drastically but is incorrect and is not used FAgr.OLS <- function(Psi, S, nf) #the first derivatives -- seems bad { E <- eigen(S-diag(Psi), symmetric = TRUE) U <- E$vectors[, 1:nf, drop = FALSE] D <- E$values[1:nf] D [D < 0] <-0 L <- U %*% diag(sqrt(D)) model <- L %*% t(L) g <- diag(Psi) - diag(S -model) # g <- model - data diag(g)/Psi^2 #(diag(g) - Psi)/Psi } ############################### ############################## # These functions are now commented out, used to test fa # #now test this # test.ols <- function(R,nf) { #this does not agree with Hao Wu -- something is wrong with the gradient # start <- diag(R)- smc(R) # res <- optim(start, FA.OLS,gr=FAgr.OLS, method = "L-BFGS-B", lower = .005, # upper = 1, control = c(list(fnscale = 1, parscale = rep(0.01, # length(start)))), nf= nf, S=R) # Lambda <- FAout.wls(res$par, R, nf) # } # # test.ols <- function(R,nf) { #this agrees with the Hao solution-- not using a gradient # start <- diag(R)- smc(R) # res <- optim(start, FA.OLS, method = "L-BFGS-B", lower = .005, # upper = 1, control = c(list(fnscale = 1, parscale = rep(0.01, # length(start)))), nf= nf, S=R ) # Lambda1 <- FAout.wls(res$par, R, nf) # # } ########## #the following two functions, sent by Hao Wu have been used for benchmarking, but are not used in fa. ##Now I define a function to minimize the FOLS function above w.r.t the unique variances. It returns the standardized loadings, raw loadings, unique variances, OLS function value and convergence status (0= convergence). #the Hao Wu functions (although not used, they are included here for completeness # FOLS<-function(Psi,S,fac){ # eig<-eigen(S-diag(Psi),symmetric=T); # U<-eig$vectors[,1:fac]; # D<-eig$values[1:fac]; # D[D<0]<-0; # L<-U%*%diag(sqrt(D),fac,fac); # Omega<-L%*%t(L); # diag(Omega)<-diag(S); # return(sum((S-Omega)^2)/2); # } # # EFAOLS2 <- function(S,Psi0=1/diag(chol2inv(chol(S))),fac) { # efa <- nlminb(Psi0,FOLS,lower=0,S=S,fac=fac) # fit.OLS<-efa$objective # fit.Psi<-efa$par # eig<-eigen(S-diag(fit.Psi),symmetric=T) # U<-eig$vectors[,1:fac] # D<-eig$values[1:fac] # D [D<0] <-0 # fit.L<-U%*%diag(sqrt(D),fac,fac) # return(list(st.L=diag(1/diag(S))%*%fit.L,L=fit.L,Psi=fit.Psi,F=fit.OLS,convergence=efa$convergence)) # } # ############################## ## now start the main function #np.obs <- NULL #only returned with a value in case of fm="minchi" if (fm == "mle" || fm =="MLE" || fm == "ML" ) fm <- "ml" #to correct any confusion if (!any(fm %in%(c("pa","alpha", "minrank","wls","gls","minres","minchi", "uls","ml","mle","ols" ,"old.min") ))) {message("factor method not specified correctly, minimum residual (unweighted least squares used") fm <- "minres" } x.matrix <- r n <- dim(r)[2] if (!isCorrelation(r) & !isCovariance(r)) { matrix.input <- FALSE #return the correlation matrix in this case n.obs <- dim(r)[1] #Added the test for none-symmetric in case we have a covariance matrix 4/10/19 if(missing) { #impute values x.matrix <- as.matrix(x.matrix) #the trick for replacing missing works only on matrices miss <- which(is.na(x.matrix),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x.matrix,na.rm=TRUE) #replace missing values with means x.matrix[miss]<- item.means[miss[,2]]} else { item.med <- apply(x.matrix,2,median,na.rm=TRUE) #replace missing with medians x.matrix[miss]<- item.med[miss[,2]]} } #if(fm=="minchi") np.obs <- pairwiseCount(r) #used if we want to do sample size weighting if(covar) {cor <- "cov"} # if given a rectangular matrix, then find the correlation or covariance #multiple ways of find correlations or covariances #added the weights option to tet, poly, tetrachoric, and polychoric June 27, 2018 switch(cor, cor = {if(!is.null(weight)) {r <- cor.wt(r,w=weight)$r} else { r <- cor(r,use=use)} }, cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) } else { matrix.input <- TRUE #don't return the correlation matrix if(fm=="minchi") { if(is.null(np.obs)) {fm <- "minres" message("factor method minchi does not make sense unless we know the sample size, minres used instead") } } if(is.na(n.obs) && !is.null(np.obs)) n.obs <- max(as.vector(np.obs)) if(!is.matrix(r)) { r <- as.matrix(r)} if(!covar) { r <- cov2cor(r) #probably better to do it this way (11/22/2010) #sds <- sqrt(diag(r)) #convert covariance matrices to correlation matrices # r <- r/(sds %o% sds) #if we remove this, then we need to fix the communality estimates } } #added June 9, 2008 #does this next line actually do anything? if (!residuals) { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,np.obs=np.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),fit=0)} else { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,np.obs=np.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0,r=r)} if(is.null(SMC)) SMC=TRUE #if we don't specify it, make it true r.mat <- r Phi <- NULL colnames(r.mat) <- rownames(r.mat) <- colnames(r) if(any(is.na(r))) { bad <- TRUE tempr <-r wcl <-NULL while(bad) { wc <- table(which(is.na(tempr), arr.ind=TRUE)) #find the correlations that are NA wcl <- c(wcl,as.numeric(names(which(wc==max(wc))))) tempr <- r[-wcl,-wcl] if(any(is.na(tempr))) {bad <- TRUE} else {bad <- FALSE} } cat('\nLikely variables with missing values are ',colnames(r)[wcl],' \n') stop("I am sorry: missing values (NAs) in the correlation matrix do not allow me to continue.\nPlease drop those variables and try again." ) } if(is.logical(SMC) ) { if(SMC) {if(nfactors <= n) {#changed to <= n instead of < n/2 This warning seems to confuse people unnecessarily diag(r.mat) <- smc(r,covar=covar) } else {if (warnings) { message("In fa, too many factors requested for this number of variables to use SMC for communality estimates, 1s are used instead")} } } else { diag(r.mat) <- 1 } } else { diag(r.mat) <- SMC} orig <- diag(r) comm <- sum(diag(r.mat)) err <- comm i <- 1 comm.list <- list() #principal axis is an iterative eigen value fitting if(fm =="alpha") { #alpha factor analysis iteratively replaces the diagonal with revised communalities, and then rescales the matrix i <- 1 #count the iterations e.values <- eigen(r,symmetric=symmetric)$values #store the original solution H2 <- diag(r.mat) #the original communality estimate while(err > min.err) { #iteratively replace the diagonal with our revised communality estimate r.mat <- cov2cor(r.mat) #this has rescaled the correlations based upon the communalities eigens <- eigen(r.mat,symmetric=symmetric) loadings <- eigens$vectors[,1:nfactors,drop=FALSE] %*% diag(sqrt(eigens$values[1:nfactors,drop=FALSE])) model <- loadings %*% t(loadings) newH2 <- H2 * diag(model) err <- sum(abs(H2 - newH2)) r.mat <- r diag(r.mat) <- newH2 H2 <- newH2 i <- i + 1 if(i > max.iter) { if(warnings) {message("maximum iteration exceeded")} err <-0 } } #end of while loop loadings <- sqrt(H2) * loadings eigens <- sqrt(H2) * eigens$values #fixed 7/4/22 thanks to P. Doebler comm1 <- sum(H2) } #end alpha factor analysis if(fm=="pa") { e.values <- eigen(r,symmetric=symmetric)$values #store the original solution while(err > min.err) #iteratively replace the diagonal with our revised communality estimate { eigens <- eigen(r.mat,symmetric=symmetric) if(nfactors >1 ) {loadings <- eigens$vectors[,1:nfactors] %*% diag(sqrt(eigens$values[1:nfactors])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) new <- diag(model) comm1 <- sum(new) diag(r.mat) <- new err <- abs(comm-comm1) if(is.na(err)) {warning("imaginary eigen value condition encountered in fa\n Try again with SMC=FALSE \n exiting fa") break} comm <- comm1 comm.list[[i]] <- comm1 i <- i + 1 if(i > max.iter) { if(warnings) {message("maximum iteration exceeded")} err <-0 } } #end of while loop eigens <- eigens$values } if (fm=="minrank") {mrfa <- MRFA(r,nfactors) loadings <- mrfa$loadings model <- loadings %*% t(loadings) e.values <- eigen(r)$values S <- r diag(S) <- diag(model) eigens <- eigen(S)$values } if((fm == "wls") | (fm=="minres") |(fm=="minchi") | (fm=="gls") | (fm=="uls")|(fm== "ml")|(fm== "mle") | (fm=="ols") | (fm=="old.min")) { uls <- fit(r,nfactors,fm,covar=covar) e.values <- eigen(r)$values #eigen values of pc: used for the summary stats -- result.res <- uls$res loadings <- uls$loadings model <- loadings %*% t(loadings) S <- r diag(S) <- diag(model) #communalities from the factor model eigens <- eigen(S)$values } # a weird condition that happens with poor data #making the matrix symmetric solves this problem if(!is.double(loadings)) {warning('the matrix has produced imaginary results -- proceed with caution') loadings <- matrix(as.double(loadings),ncol=nfactors) } #make each vector signed so that the maximum loading is positive - should do after rotation #Alternatively, flip to make the colSums of loading positive if (nfactors >1) {sign.tot <- vector(mode="numeric",length=nfactors) sign.tot <- sign(colSums(loadings)) sign.tot[sign.tot==0] <- 1 loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) < 0) {loadings <- -as.matrix(loadings)} else {loadings <- as.matrix(loadings)} colnames(loadings) <- "MR1" } switch(fm, alpha = {colnames(loadings) <- paste("alpha",1:nfactors,sep='')}, wls={colnames(loadings) <- paste("WLS",1:nfactors,sep='') }, pa= {colnames(loadings) <- paste("PA",1:nfactors,sep='')} , gls = {colnames(loadings) <- paste("GLS",1:nfactors,sep='')}, ml = {colnames(loadings) <- paste("ML",1:nfactors,sep='')}, minres = {colnames(loadings) <- paste("MR",1:nfactors,sep='')}, minrank = {colnames(loadings) <- paste("MRFA",1:nfactors,sep='')}, minchi = {colnames(loadings) <- paste("MC",1:nfactors,sep='')} ) rownames(loadings) <- rownames(r) loadings[loadings==0.0] <- 10^-15 #added to stop a problem with varimax if loadings are exactly 0 model <- loadings %*% t(loadings) f.loadings <- loadings #used to pass them to factor.stats rot.mat <- NULL rotated <- NULL if(rotate != "none") {if (nfactors > 1) { if(n.rotations > 1) {rotated <- faRotations(loadings,r=r,n.rotations=n.rotations,rotate=rotate,hyper=hyper) loadings=rotated$loadings Phi<- rotated$Phi rot.mat=rotated$rot.mat } else { rotated <- NULL #just do one rotation if (rotate=="varimax" |rotate=="Varimax" | rotate=="quartimax" | rotate =="bentlerT" | rotate =="geominT" | rotate =="targetT" | rotate =="bifactor" | rotate =="TargetT"| rotate =="equamax"| rotate =="varimin"|rotate =="specialT" | rotate =="Promax" | rotate =="promax"| rotate =="cluster" |rotate == "biquartimin" |rotate == "TargetQ" |rotate =="specialQ" ) { Phi <- NULL switch(rotate, #The orthogonal cases for GPArotation + ones developed for psych varimax = {rotated <- stats::varimax(loadings) #varimax is from stats, the others are from GPArotation loadings <- rotated$loadings rot.mat <- rotated$rotmat}, Varimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #varimax is from the stats package, Varimax is from GPArotations #rotated <- do.call(rotate,list(loadings,...)) #rotated <- do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...)) rotated <- GPArotation::Varimax(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , quartimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings)) rotated <- GPArotation::quartimax(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bentlerT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::bentlerT(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , geominT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::geominT(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , targetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rotated <- GPArotation::targetT(loadings,Tmat=diag(ncol(loadings)),...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bifactor = {rot <- bifactor(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, TargetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rot <- GPArotation::targetT(loadings,Tmat=diag(ncol(loadings)),...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, equamax = {rot <- equamax(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, varimin = {rot <- varimin(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, specialT = {rot <- specialT(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, Promax = {pro <- Promax(loadings,...) #Promax without Kaiser normalization loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, promax = {#pro <- stats::promax(loadings,...) #from stats pro <- kaiser(loadings,rotate="Promax",...) #calling promax will now do the Kaiser normalization before doing Promax rotation loadings <- pro$loadings rot.mat <- pro$rotmat # ui <- solve(rot.mat) # Phi <- cov2cor(ui %*% t(ui)) Phi <- pro$Phi }, cluster = {loadings <- varimax(loadings,...)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, biquartimin = {ob <- biquartimin(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, TargetQ = {ob <- TargetQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, specialQ = {ob <- specialQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(pro$Th))}) } else { #The following oblique cases all use GPArotation if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...))) if(inherits(ob,as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } else {message("Specified rotation not found, rotate='none' used")} } } } } else {rotated <- NULL } signed <- sign(colSums(loadings)) signed[signed==0] <- 1 loadings <- loadings %*% diag(signed) #flips factors to be in positive direction but loses the colnames if(!is.null(Phi)) {Phi <- diag(signed) %*% Phi %*% diag(signed) } #added October 20, 2009 to correct bug found by Erich Studerus switch(fm, alpha = {colnames(loadings) <- paste("alpha",1:nfactors,sep='')}, wls={colnames(loadings) <- paste("WLS",1:nfactors,sep='') }, pa= {colnames(loadings) <- paste("PA",1:nfactors,sep='')} , gls = {colnames(loadings) <- paste("GLS",1:nfactors,sep='')}, ml = {colnames(loadings) <- paste("ML",1:nfactors,sep='')}, minres = {colnames(loadings) <- paste("MR",1:nfactors,sep='')}, minrank = {colnames(loadings) <- paste("MRFA",1:nfactors,sep='')}, uls = {colnames(loadings) <- paste("ULS",1:nfactors,sep='')}, old.min = {colnames(loadings) <- paste0("oldmin",1:nfactors)}, minchi = {colnames(loadings) <- paste("MC",1:nfactors,sep='')}) #just in case the rotation changes the order of the factors, sort them #added October 30, 2008 if(nfactors >1) { if(is.null(Phi)) {ev.rotated <- diag(t(loadings) %*% loadings)} else { ev.rotated <- diag( Phi %*% t(loadings) %*% loadings)} # probably should include Phi in this operation 3/10/22 ev.order <- order(ev.rotated,decreasing=TRUE) loadings <- loadings[,ev.order]} rownames(loadings) <- colnames(r) if(!is.null(Phi)) {Phi <- Phi[ev.order,ev.order] } #January 20, 2009 but, then, we also need to change the order of the rotation matrix! class(loadings) <- "loadings" if(nfactors < 1) nfactors <- n # if(max(abs(loadings) > 1.0) && !covar) warning(' A loading greater than abs(1) was detected. Examine the loadings carefully.') result <- factor.stats(r,loadings,Phi,n.obs=n.obs,np.obs=np.obs,alpha=alpha,smooth=smooth) #do stats as a subroutine common to several functions result$rotation <- rotate if(nfactors != 1) {result$hyperplane <- colSums(abs(loadings )< hyper)} else {result$hyperplane <- sum(abs(loadings) < hyper)} result$communality <- diag(model) if(max(result$communality > 1.0) && !covar) warning("An ultra-Heywood case was detected. Examine the results carefully") if(fm == "minrank") {result$communalities <- mrfa$communality} else {if(fm=="pa" | fm == "alpha") {result$communalities <- comm1} else {result$communalities <- 1- result.res$par}} result$uniquenesses <- diag(r-model) result$values <- eigens result$e.values <- e.values result$loadings <- loadings result$model <- model #diag(result$model) <- diag(r) result$fm <- fm #remember what kind of analysis we did result$rot.mat <- rot.mat if(!is.null(Phi) ) {colnames(Phi) <- rownames(Phi) <- colnames(loadings) #added 2/14/16 to help with fa.multi result$Phi <- Phi #the if statement was incorrectly including oblique.scores. Fixed Feb, 2012 following a report by Jessica Jaynes Structure <- loadings %*% Phi} else {Structure <- loadings} class(Structure) <- "loadings" result$Structure <- Structure #added December 12, 2011 if(fm == "pa") result$communality.iterations <- unlist(comm.list) #Some of the Grice equations use the pattern matrix, but some use the structure matrix #we are now dropping this oblique score option (9/2/17) result$method=scores #this is the chosen method for factor scores if(oblique.scores) {result$scores <- factor.scores(x.matrix,f=loadings,Phi=NULL,method=scores, missing=missing,impute=impute) } else {result$scores <- factor.scores(x.matrix,f=loadings,Phi=Phi,method=scores, missing=missing, impute=impute)} #added missing and impute 9/5/22 if(is.null( result$scores$R2)) result$scores$R2 <- NA result$R2.scores <- result$scores$R2 result$weights <- result$scores$weights #these are the weights found in factor scores and will be different from the ones reported by factor.stats result$scores <- result$scores$scores if(!is.null(result$scores)) colnames(result$scores) <- colnames(loadings) #added Sept 27, 2013 result$factors <- nfactors result$r <- r #save the correlation matrix result$np.obs <- np.obs result$fn <- "fa" result$fm <- fm #Find the summary statistics of Variance accounted for #normally just found in the print function (added 4/22/17) #from the print function if(is.null(Phi)) {if(nfactors > 1) {vx <- colSums(loadings^2) } else {vx <- sum(loadings^2) }} else {vx <- diag(Phi %*% t(loadings) %*% loadings) } vtotal <- sum(result$communality + result$uniquenesses) names(vx) <- colnames(loadings) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/vtotal) if (nfactors > 1) { varex <- rbind(varex, "Cumulative Var"= cumsum(vx/vtotal)) varex <- rbind(varex, "Proportion Explained"= vx/sum(vx)) varex <- rbind(varex, "Cumulative Proportion"= cumsum(vx/sum(vx))) } result$rotated <- rotated$rotation.stats result$Vaccounted <- varex result$Call <- cl class(result) <- c("psych", "fa") return(result) } #modified October 30, 2008 to sort the rotated loadings matrix by the eigen values. #modified Spring, 2009 to add multiple ways of doing factor analysis #corrected, August, 2009 to count the diagonal when doing GLS or WLS - this mainly affects (improves) the chi square #modified April 4, 2011 to find the factor scores of the oblique factors #modified December 12, 2011 to report structure coefficients as well as pattern (loadings) #modified February 11, 2013 to correctly treat SMC=FALSE as 1s instead of 0s. #modified spring, 2015 to use switch in the rotation options #modified August 25, 2015 to add rot.mat as output #modified February 22, 2016 to keep the diagonal of the model as it should be -- e.g., the communalities #December 23, 2019 changed the call to mixed.cor to be mixedCor.psych/R/anova.psych.R0000644000176200001440000001154714526774627014211 0ustar liggesusers#A function to report the difference between two factor models #adapted from John Fox's sem anova #modified November 29, 2019 to include anovas for setCor and mediate models anova.psych <- function(object,...) { #if(length(class(object)) > 1) { value <- class(object)[2] } else {value <- NA} if(length(class(object)) > 1) { names <- cs(omega,fa, lmCor,mediate) value <- inherits(object,names,which=TRUE) # value <- class(x)[2] if(any(value > 1) ) { value <- names[which(value > 0)]} else {value <- "other"} } else {value <- "other"} #this does the work for lmCors and mediate or any model that returns SSR and dfs small.function <- function(models,dfs,SSR,nvar=1) { #this next section is adapted from anova.lm and anova.lmlist if(nvar==1) { n.mod <- length(models) mods <- unlist(models) for(i in 1:n.mod) { temp <- unlist(mods[[i]]) cat("Model",i, "= ") print(temp,rownames=FALSE) } } table <- data.frame(df=unlist(dfs),SSR=unlist(SSR)) MSR <- table$SSR/table$df df <- table$df diffSS <- -diff(table$SSR) diffdf <- -diff(table$df) #find the model with the most df biggest.df <- order(table$df)[1] scale <- table[biggest.df,"SSR"]/table[biggest.df,"df"] F <- (diffSS) /diffdf/scale prob <- pf(F,abs(diffdf),df[-1],lower.tail=FALSE) table <- data.frame(table,diff.df=c(NA,diffdf),diff.SS= c(NA,diffSS),F= c(NA,F),c(NA,prob)) names(table) <- c("Res Df","Res SS", "Diff df","Diff SS","F","Pr(F > )") return(table)} #and this does the work for fa and omega another.function <- function(models,dfs,echis,chi,BICS) { mods <- unlist(models) n.mod <- length(models) mods <- unlist(models) for(i in 1:n.mod) { temp <- unlist(mods[[i]]) cat("Model",i, "= ") print(temp,rownames=FALSE) } delta.df <- -diff(unlist(dfs)) delta.chi <- -diff(unlist(chi)) if(!is.null(echis) ) {delta.echi <- -diff(unlist(echis))} else {delta.echi <- NA} delta.bic <- diff(unlist(BICS)) test.chi <- delta.chi/delta.df test.echi <- delta.echi/delta.df p.delta <- pchisq(delta.chi, delta.df, lower.tail=FALSE) if(!is.null(echis) ){ table <- data.frame(df=unlist(dfs),d.df=c(NA,delta.df),chiSq=unlist(chi), d.chiSq=c(NA,delta.chi), PR=c(NA,p.delta),test=c(NA,test.chi), empirical = unlist(echis),d.empirical=c(NA,delta.echi),test.echi=c(NA,test.echi),BIC=unlist(BICS),d.BIC = c(NA,delta.bic))} else { table <- data.frame(df=unlist(dfs),d.df=c(NA,delta.df),chiSq=unlist(chi), d.chiSq=c(NA,delta.chi), PR=c(NA,p.delta),test=c(NA,test.chi),BIC=unlist(BICS),d.BIC = c(NA,delta.bic))} table <- round(table,2) return(table) } switch(value, mediate ={ if (length(list(object, ...)) > 1L) { objects <- list(object,...) dfs <- lapply(objects, function(x) x$cprime.reg$df) SSR <- lapply(objects, function(x) x$cprime.reg$SE.resid^2 * x$cprime.reg$df) models <- lapply(objects, function(x) x$Call) table <- small.function(models=models,dfs=dfs,SSR=SSR) } }, lmCor ={ if (length(list(object, ...)) > 1L) { objects <- list(object,...) dfs <- lapply(objects, function(x) x$df[2]) SSR <- lapply(objects, function(x) x$SE.resid^2 * x$df[2]) models <- lapply(objects, function(x) x$Call) if (length(SSR) ==1) {table <- small.function(models=models,dfs=dfs,SSR=SSR)} else { table <- list() nvar <- length(SSR[[1]]) ssrm <- matrix(unlist(SSR),ncol=nvar,byrow=TRUE) for (i in (1:nvar)) { table[[names(SSR[[1]][i])]] <- small.function(models=models,dfs=dfs,SSR=ssrm[,i],nvar=i) } } } return(table) }, fa = { if (length(list(object, ...)) > 1L) { objects <- list(object,...) n.models <- length(objects) echis <- lapply(objects,function(x) x$chi) BICS <- lapply(objects,function(x) x$BIC) dofs <- lapply(objects,function(x) x$dof) chi <- lapply(objects,function(x) x$STATISTIC) models <- lapply(objects, function(x) x$Call) nechi <- length (echis) nBics <- length(BICS) nchi <- length(chi) if(nechi != n.models) {stop("You do not seem to have chi square values for one of the models ")} if(nchi != n.models) {stop("You do not seem to have chi square values for one of the models ")} table <- another.function(models,dfs=dofs,echis=echis,chi = chi,BICS = BICS) } }, omega = { #should change this to include more than 2 models (see above ) if (length(list(object, ...)) > 1L) { objects <- list(object,...) n.models <- length(objects) # echis <- lapply(objects,function(x) x$schmid$chi) BICS <- lapply(objects,function(x) x$schmid$BIC) dofs <- lapply(objects,function(x) x$schmid$dof) chi <- lapply(objects,function(x) x$schmid$STATISTIC) models <- lapply(objects, function(x) x$Call) # nechi <- length (echis) nBics <- length(BICS) nchi <- length(chi) table <- another.function(models,dfs=dofs,echis=NULL,chi = chi,BICS = BICS) } } ) return(table) } psych/R/faBy.R0000644000176200001440000001505714207713374012625 0ustar liggesusers"faBy" <- function(stats,nfactors=1,rotate="oblimin",fm="minres",free=TRUE,all=FALSE,min.n=12,quant=.1,...) { if(!inherits(stats, "statsBy")) stop("Please run statsBy first") cl <- match.call() fo.orth <- fa(stats$pooled,nfactors=nfactors,rotate="none",fm=fm) #get the overall pooled structure fo.rotated <- fa(stats$pooled,nfactors=nfactors,rotate=rotate,fm=fm,...) #could replace with a call to #fo.rotated <- faRotate(fo.orth,rotate=rotate,...) # fl <- fo.rotated$loadings fl <- fo.rotated$loadings f <- list() #hold the results of each fa for each group ngroups <- stats$nG nvar <- ncol(stats$r[[1]]) #replicateslist <- mclapply(1:ngroups,function(x) { stats$r <- pickgood(stats,min.n=min.n) #get the good analyses replicateslist <- lapply(stats$r,function(X,...) { if(!is.null(X) ){ if(!free && (nfactors > 1)) { fs <- try(fac(X,nfactors=nfactors,rotate="none",scores="none",...)) #call fa but do not rotate #First match the orthogonal factors to get the right order and directions #then target rotate the subject solution to the oblique pooled solution #then match the order and directions (fixing the correlations) of this new solution fs$loadings <- faMatch(fo.orth,fs)$loadings #first match them and then target rotate to overall fs<- TargetQ(fs$loadings,Target=list(fl)) fs <- faMatch(fl,fs) #try to match them but don't force a rotation } else { fs <- try(fac(X,nfactors=nfactors,rotate=rotate,scores="none",...)) #call fa with the appropriate parameters if(nfactors > 1) fs$loadings <- faMatch(fl,fs)$loadings #try to match them but don't force a rotation } #if( length(class(fs)) ==1 ) {warning("could not factor a within subject matrix")} else { # if(!free && (nfactors > 1)) { else { if(!is.null(fs$Phi)) { phis <- fs$Phi if(all) { replicates <- list(fa=fs,loadings=(fs$loadings),phis=phis,vloadings = as.vector(fs$loadings),vphis = phis[lower.tri(phis)])} else { replicates <- list(loadings=fs$loadings,phis=phis,vloadings = as.vector(fs$loadings),vphis = phis[lower.tri(phis)])} } else {replicates <- list(loadings=fs$loadings,vloadings = as.vector(fs$loadings))} } } ) fabygroups <- lapply(replicateslist,function(X) X$vloadings) notnullgroup <- unlist(lapply(fabygroups,function(x) !is.null(x))) namesbygroup <- names(fabygroups)[notnullgroup] fabygroups <- matrix(unlist(lapply(replicateslist,function(X) X$vloadings)),ncol=nvar*nfactors,byrow=TRUE) num.groups <- nrow(fabygroups) means <- colMeans(fabygroups,na.rm=TRUE) sds <- apply(fabygroups,2,sd,na.rm=TRUE) quants.low <- apply(fabygroups,2,quantile,quant) quants.high<- apply(fabygroups,2,quantile,1-quant) fnames<- colnames(fo.rotated$loadings)[1:nfactors] vnames <- rownames(fo.rotated$loadings) faby.sum <- matrix(c(as.vector(fl),means,sds,quants.low,quants.high),ncol=5) colnames(faby.sum) <-c("Pooled","mean","sd","low","high") rownames(faby.sum) <- paste(rep(vnames,nfactors)) faby <- t(fabygroups) colnames(faby) <- c(paste0("gr-",namesbygroup)) rownames(faby) <- paste(rep(vnames,nfactors),"-",rep(fnames,each=nvar)) if(!is.null(fo.rotated$Phi)) { vphis <- matrix(unlist(lapply(replicateslist,function(X) X$vphis)),nrow=num.groups,byrow=TRUE) means.phis <- colMeans(vphis) sds.phis <- apply(vphis,2,sd,na.rm=TRUE) phis.low <- apply(vphis,2,quantile,quant) phis.high <- apply(vphis,2,quantile,1-quant) phiby.sum <- matrix(c(fo.rotated$Phi[lower.tri(fo.rotated$Phi)],means.phis,sds.phis,phis.low,phis.high),ncol=5) phiby <- (matrix(c(fo.rotated$Phi[lower.tri(fo.rotated$Phi)],means.phis,sds.phis,phis.low,phis.high,t(vphis)), ncol=(num.groups+5),byrow=FALSE)) colnames(phiby) <- c("Total","Mean","sd","low","high", paste0("gr-",namesbygroup)) rownames(phiby) <-1:(nfactors*(nfactors-1)/2) k <- 1 for (fi in 1:(nfactors-1)) { for (fj in (fi+1):(nfactors)) {rownames(phiby)[k] <- paste(fnames[fi],"-",fnames[fj],sep="") k <- k +1 }} phis <- matrix(0,nfactors,nfactors) phis[lower.tri(phis)] <- means.phis phis <-phis + t(phis) diag(phis) <- 1 colnames(phis) <- rownames(phis) <- fnames } else {phiby.sum <- phis <- phiby <- NA } meanloading <- matrix(means,ncol=nfactors) colnames(meanloading) <- fnames rownames(meanloading) <- vnames faBy <- list(mean.loading= meanloading,mean.Phi= phis,faby.sum=faby.sum,Phi.sum = phiby.sum,loadings=t(faby),Phi=t(phiby),nfactors=nfactors,quant=quant,Call=cl) if(all) {faBy$fa=lapply(replicateslist,function(X) X$fa) } class(faBy) <- c("psych","faBy") return(faBy) } #not used here but helpful to understand factor.match <- function(f1,f2) { cong <- fa.congruence(f1,f2) match <- apply(cong,1,function(x) which.max(abs(x))) f2 <- fa.organize(f2,o=match) cong <- fa.congruence(f1,f2) flip <- diag(sign(diag(cong))) f2$loadings <- f2$loadings %*% flip phi <- flip %*% f2$Phi %*% flip return(list(f=f2,phi=phi)) } faMatch <- function(f1,f2){ cong <- factor.congruence(f1,f2) cn <- colnames(f2$loadings) ord <- apply(cong,1,function(x) which.max(abs(x))) if(length(unique(ord))==NCOL(cong)) { cong <- cong[,ord]} else { ord <- apply(cong,2,function(x) which.max(abs(x))) cong <- cong[ord,]} flip <- diag(sign(diag(cong))) f2$loadings <- f2$loadings[,ord] %*% flip f2$Phi <- f2$Phi[ord,ord] colnames(f2$loadings)<- cn[ord] f2$Phi <- flip %*% f2$Phi %*% flip colnames(f2$Phi) <- rownames(f2$Phi) <- cn[ord] return(f2) } #replaced with faMatch 3/1/22 "oldfaMatch" <- function(f1,f2) { fc <- factor.congruence(f1,f2) ord <- 1:ncol(fc) for(i in 1:(ncol(fc)-1)) { new <- which(abs(fc[i,])==max(abs(fc[i,])) ) old <- ord[i] ord[i] <- new ord[new] <- old } flip <- rep(1,ncol(fc)) for (i in 1:ncol(fc)) { if(fc[ord[i],ord[i]] < 0) { f2$loadings[,ord[i]] <- f2$loadings[,ord[i]] * -1 flip[i] <- -1 } if(!is.null(f2$Phi)) f2$Phi <- diag(flip) %*% f2$Phi %*% diag(flip) } # ord <- apply(fc,2,function(x) {which(abs(x)==max(abs(x)))}) f2 <- fa.organize(f2,o=ord) return(f2)} "pickgood" <- function(stats,min.n) { #just look at those cases with good data new <- list() for (i in 1: length(stats$r)) { if(!any(is.na(stats$r[[i]])) & (min(stats$n[[i]]) >= min.n)) {new[i] <- stats$r[i] } # if(min(stats$n[[i]]) >= min.n) } names(new) <- names(stats$r) return(new)}psych/R/lavaan.diagram.R0000644000176200001440000000764114524501107014600 0ustar liggesusers"lavaan.diagram" <- function(fit,main,e.size=.1,...) { if (is.null(fit@Model@GLIST$beta)) {model <- "cfa"} else {model <- "sem"} if(missing(main)) {if(model =="cfa") { main="Confirmatory structure" } else {main = "Structural model"} } mimic <- fit@Model@fixed.x if(!mimic) {#the normal case, either a cfa or a sem fx=fit@Model@GLIST$lambda #the x and y loadings #but, if it is a sem, we need to find the y variables # colnames(fx) <- fit@Model@dimNames$lambda[[2]] colnames(fx) <- fit@Model@dimNames[[1]][[2]] rownames(fx) <- fit@Model@dimNames[[1]][[1]] if(model=="sem") {Phi <- fit@Model@GLIST$beta} else { Phi <- fit@Model@GLIST$psi} Rx <- fit@Model@GLIST$theta v.labels <-fit@Model@dimNames[[1]][[1]] #either a sem or a cfa if(model == "sem" ) { x.f <- unique(fit@ParTable$rhs[fit @ParTable$op == "~"]) y.f <- unique(fit@ParTable$lhs[fit @ParTable$op == "~"]) x.f <- x.f[!x.f %in% y.f] #now, since this is a sem or we need to shrink fx and create fy # temp <- fx x.vars <- fit@ParTable$lhs[fit @ParTable$op == "=~"] %in% x.f y.vars <- !fit@ParTable$lhs[fit @ParTable$op == "=~"] %in% x.f fy <- fx[y.vars,!colnames(fx) %in% x.f,drop=FALSE] fx <- fx[x.vars,x.f,drop=FALSE] Ry <- Rx[y.vars,y.vars] } } else { model <- "mimic" } switch(model, # if(model=="cfa") { cfa= {structure.diagram(fx=fx,Phi=Phi, Rx=Rx,labels=v.labels,main=main,e.size=e.size,...)}, mimic = {#multiple indicators, multiple causes y.vars <- fit@Model@x.user.idx[[1]] #these are all the variables as numbers nx <- fit@Model@ov.x.dummy.lv.idx[[1]] fy <- as.matrix(fit@Model@GLIST$lambda[y.vars,-nx],drop=FALSE) fx <- as.matrix(fit@Model@GLIST$beta[-nx,nx],drop=FALSE) colnames(fy) <- fit@Model@dimNames[[1]][[2]][-nx] rownames(fy) <- fit@Model@dimNames[[1]][[1]][y.vars] rownames(fx) <- fit@Model@dimNames[[1]][[2]][-nx] colnames(fx) <- fit@Model@dimNames[[1]][[1]][-y.vars] # v.labels <-fit@Model@dimNames[[1]][[1]] v.labels <- c(rownames(fx),rownames(fy)) Rx <- fit@Model@GLIST$theta Phi <- fit@Model@GLIST$beta fx <- t(fx) structure.diagram(fx=fx,fy=fy,Rx=Rx,main=main,e.size=e.size,...)}, sem = { #a sem model Phi <- (fit@Model@GLIST$beta) #now draw the sem diagram structure.diagram(fx=fx,Phi=Phi,fy=fy, Rx=Rx,Ry=Ry, labels=v.labels,main=main,e.size=e.size,...) } ) #end switch } #end lavaan diagram #modified 11/6/14 to draw the regression paths #modified 11/14/18 to properly do mimic models #modified 11/11/23 to draw sem diagrams #how to figure out the lavaan object # fit@ParTable$op == "~" # fit@ParTable$rhs # fit@ParTable$lhs # fit@ParTable$lhs[fit @ParTable$op == "~"] #the y factors # fit@ParTable$rhs[fit @ParTable$op == "~"] #the regression predictors #fit@ParTable$rhs[fit@ParTable$op == "=~"] #the variables #fit@ParTable$lhs[fit @ParTable$op == "=~"] #the factors #created August 17, 2017 to allow sem.diagrams and graphs from sem output "sem.diagram" <- function(fit,main="A SEM from the sem package",...) { nvar <- ncol(fit$S) var.names <- fit$var.names tot.var <- ncol(fit$A) num.factors <- length(var.names) - nvar fx <- fit$A[1:nvar,(nvar+1):ncol(fit$A)] Phi <- fit$P[(nvar+1):tot.var,(nvar+1):tot.var] structure.diagram(fx,Phi,main=main,...) } "sem.graph" <- function(fit,out.file=NULL,main="A SEM from the sem package",...) { nvar <- ncol(fit$S) var.names <- fit$var.names tot.var <- ncol(fit$A) num.factors <- length(var.names) - nvar fx <- fit$A[1:nvar,(nvar+1):ncol(fit$A)] Phi <- fit$P[(nvar+1):tot.var,(nvar+1):tot.var] structure.graph(fx,Phi,out.file=out.file,title=main,...) } psych/R/error.crosses.R0000755000176200001440000000507212766545004014555 0ustar liggesusers"error.crosses" <- function (x,y,labels=NULL,main=NULL,xlim=NULL,ylim= NULL,xlab=NULL,ylab=NULL,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,add=FALSE,colors=NULL,col.arrows=NULL,col.text=NULL,...) # x and y are data frame or descriptive stats {if(is.vector(x)) {x <- describe(x)} xmin <- min(x$mean) xmax <- max(x$mean) if(sd) {max.sex <- max(x$sd,na.rm=TRUE) if(is.null(xlim)) {xlim=c(xmin - max.sex,xmax + max.sex) }} else {max.sex <- max(x$se,na.rm=TRUE)} if(is.vector(y)) {y <- describe(y)} ymin <- min(y$mean) ymax <- max(y$mean) if(sd) {max.sey <- max(y$sd,na.rm=TRUE) if(is.null(ylim)) {ylim=c(ymin - max.sey,ymax +max.sey)}} else { max.sey <- max(y$se,na.rm=TRUE) } if(is.null(xlim)) xlim=c(xmin - 2*max.sex,xmax +2*max.sex) if(is.null(ylim)) ylim=c(ymin - 2*max.sey,ymax +2*max.sey) if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } if(is.null(xlab)) xlab <- "Group 1" if(is.null(ylab)) ylab <- "Group 2" if(is.null(colors)) colors <- "black" if(is.null(col.arrows)) col.arrows <- colors if(is.null(col.text)) col.text <- colors if(!add) plot(x$mean,y$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,col=colors,...) cix <- qt(1-alpha/2,x$n-1) ciy <- qt(1-alpha/2,y$n-1) z <- dim(x)[1] if(sd) {x$se <- x$sd y$se <- y$sd cix <- ciy <- rep(1,z) } if (is.null(pos)) {locate <- rep(1,z)} else {locate <- pos} if (is.null(labels)) {labels <- rownames(x)} if (is.null(labels)) {lab <- paste("V",1:z,sep="")} else {lab <-labels} if(length(col.arrows) < z) {col.arrows <- rep(col.arrows, z)} if(length(col.text) < z) {col.text <- rep(col.text, z)} for (i in 1:z) {xcen <- x$mean[i] ycen <- y$mean[i] xse <- x$se[i] yse <- y$se[i] arrows(xcen-cix[i]* xse,ycen,xcen+ cix[i]* xse,ycen,length=arrow.len, angle = 90, code=3,col = col.arrows[i], lty = NULL, lwd = par("lwd"), xpd = NULL) arrows(xcen,ycen-ciy[i]* yse,xcen,ycen+ ciy[i]*yse,length=arrow.len, angle = 90, code=3,col =col.arrows[i], lty = NULL, lwd = par("lwd"), xpd = NULL) text(xcen,ycen,labels=lab[i],pos=locate[i],offset=offset,col = col.text[i],...) #puts in labels for all points } } #Sept 11, 2013 changed n to n-1 in call to qt (following a suggestion by Trevor Dodds) #modified Sept 15, 2016 with help from Arnaud Defaye psych/R/item.sim.R0000644000176200001440000000253410626070073013456 0ustar liggesusers"item.sim" <- function (nvar = 72 ,nsub = 500, circum = FALSE, xloading =.6, yloading = .6, gloading=0, xbias=0, ybias = 0,categorical=FALSE, low=-3,high=3,truncate=FALSE,cutpoint=0) { avloading <- (xloading+yloading)/2 errorweight <- sqrt(1-(avloading^2 + gloading^2)) #squared errors and true score weights add to 1 g <- rnorm(nsub) truex <- rnorm(nsub)* xloading +xbias #generate normal true scores for x + xbias truey <- rnorm(nsub) * yloading + ybias #generate normal true scores for y + ybias if (circum) #make a vector of radians (the whole way around the circle) if circumplex {radia <- seq(0,2*pi,len=nvar+1) rad <- radia[which(radia<2*pi)] #get rid of the last one } else rad <- c(rep(0,nvar/4),rep(pi/2,nvar/4),rep(pi,nvar/4),rep(3*pi/2,nvar/4)) #simple structure error<- matrix(rnorm(nsub*(nvar)),nsub) #create normal error scores #true score matrix for each item reflects structure in radians trueitem <- outer(truex, cos(rad)) + outer(truey,sin(rad)) item<- gloading * g + trueitem + errorweight*error #observed item = true score + error score if (categorical) { item = round(item) #round all items to nearest integer value item[(item<= low)] <- low item[(item>high) ] <- high } if (truncate) {item[item < cutpoint] <- 0 } return (item) } psych/R/print.psych.iclust.R0000644000176200001440000000476214444406576015536 0ustar liggesusers"print_psych.iclust" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { cat("ICLUST (Item Cluster Analysis)") cat("\nCall: ") print(x$call) if((!is.null(x$purify)) && x$purify) { cat("\nPurified Alpha:\n") print(x$purified$alpha,digits) cat("\nG6* reliability:\n") print(x$purified$G6,digits) cat("\nOriginal Beta:\n") print(x$beta,digits) cat("\nCluster size:\n") print(x$purified$size,digits) } else { cat("\noriginal Alpha:\n") print(x$alpha,digits) cat("\nG6* reliability:\n") print(x$G6,digits) cat("\nOriginal Beta:\n") print(x$beta,digits) cat("\nCluster size:\n") print(x$size,digits)} if(sort) { cat("\nItem by Cluster Structure matrix: Sorted by loading \n") load <- x$sorted$sorted if(is.null(cut)) cut <- .3 ncol <- dim(load)[2]-3 load[4:(ncol+3)] <- round(load[4:(ncol+3)],digits) fx <- as.matrix(format(load,digits=digits)) nc <- nchar(fx[1,4], type = "c") fx.1 <- fx[,1:3] fx.2 <- format(fx[,4:(3+ncol)],digits) load.2 <- load[,4:(ncol+3)] fx.2[abs(load.2)< cut] <- paste(rep(" ", nc), collapse = "") fx <- data.frame(fx.1,fx.2) print(fx,quote="FALSE") eigenvalues <- diag(t(x$pattern) %*% x$loadings) cat("\nWith Sums of squares of:\n") print(eigenvalues,digits=digits) } else {if(is.null(cut)) cut <- 0 #added 8/9/16 cat("\nItem by Cluster Structure matrix:\n") load <- unclass(x$loadings) load <- round(load,digits) fx <- format(load,digits=digits) nc <- nchar(fx[1,1], type = "c") fx[abs(load) < cut] <- paste(rep(" ", nc), collapse = "") if(is.matrix(x$clusters)) { clust <- colnames(x$clusters)[apply(abs(x$clusters),1,which.max)] pclust <- colnames(x$p.sorted$clusters)[apply(abs(x$p.sorted$clusters),1,which.max)] clust.fx <- data.frame(O=clust,P=pclust,fx)} else {clust.fx <- fx} print(clust.fx,quote="FALSE") #print(unclass(x$loadings)) eigenvalues <- diag(t(x$pattern) %*% x$loadings) cat("\nWith Sums of squares of:\n") print(eigenvalues,digits=digits) } if(!is.null(x$purified$cor)) {cat("\nPurified scale intercorrelations\n reliabilities on diagonal\n correlations corrected for attenuation above diagonal: \n") print(round(x$purified$corrected,digits=digits)) } cat("\nCluster fit = ",round(x$fit$clusterfit,digits), " Pattern fit = ",round(x$fit$patternfit,digits), " RMSR = ",round(x$fit$patternrmse,digits), "\n") }# end of print.psych.ICLUSTpsych/R/cor2dist.R0000644000176200001440000000020511607051630013451 0ustar liggesusers"cor2dist" <- function(x) { if(dim(x)[1] != dim(x)[2]) {x <- cor(x,use="pairwise")} dist <- sqrt(2*(1-x)) return(dist) }psych/R/cluster.cor.R0000644000176200001440000001736112266000607014176 0ustar liggesusers"cluster.cor" <- #added smc.items 30.12/13 to reduce the number of calls to SMC to once function(keys,r.mat,correct=TRUE,SMC=TRUE,item.smc=NULL,impute=TRUE) { #function to extract clusters according to the key vector #default is to correct for attenuation and show this above the diagonal #find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster) #takes as input the keys matrix as well as a correlation matrix of all the items tol=sqrt(.Machine$double.eps) #machine accuracy cl <- match.call() if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix if(any(is.na(r.mat))) {SMC=FALSE warning("Missing values in the correlation matrix do not allow for SMC's to be found")} r.mat[is.na(r.mat)] <- -9999999 #changes missing values to obviously incorrect values if(SMC && is.null(item.smc)) {item.smc <- smc(r.mat)} else {item.smc <- rep(1,dim(r.mat)[1])} #now done once in the main function covar <- t(keys) %*% r.mat %*% keys #matrix algebra is our friend but slow if we are doing this in iclust #probably better to just modify the matrix for those two rows and columns that are changing if doing an iclust var <- diag(covar) #these are the scale variances sd.inv <- 1/sqrt(var) ident.sd <- diag(sd.inv,ncol = length(sd.inv)) cluster.correl <- ident.sd %*% covar %*% ident.sd cluster.correl[abs(cluster.correl) > (1+tol)] <- NA #happens only if item correlations were missing -- we use 1+a little to avoid rounding problem key.var <- diag(t(keys) %*% keys) key.smc <- t(keys) %*% item.smc key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) key.lambda6 <- (var - key.var + key.smc)/var key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r colnames(cluster.correl) <- colnames(keys) rownames(cluster.correl) <- colnames(keys) names(key.lambda6) <- colnames(keys) key.lambda6 <- drop(key.lambda6) # diag(r.mat) <- 0 # row.range <- apply(r.mat,1,range,na.rm=TRUE) # row.max <- pmax(abs(row.range[1,]),abs(row.range[2,])) #find the largest absolute similarity #now, try to figure out the imputed correlation for the case of NAs. if(any(is.na(cluster.correl)) && impute) { #find the missing values based upon average covariances rather than totals #first, change those bad values back to NA warning('Some of the correlations were NA and were imputed') r.mat[r.mat < -1] <- NA n.keys <- ncol(keys) keys[keys==0] <- NA #this will allow us to find average responses for (i in 1:n.keys) { #first find which variables are screwed up if(any(is.na(cluster.correl[i,]))) {#fix them for (j in 1:n.keys) {if(is.na(cluster.correl[i,j])) {#fix it temp <- mean(colMeans((keys[,i] * r.mat),na.rm=TRUE) * keys[,j],na.rm=TRUE)*key.var[i]*key.var[j] #this is the average covariance times the number of items scored adjusted.r <- temp * ident.sd[i,i]* ident.sd[j,j] cluster.correl[i,j] <- adjusted.r } } } } } sn <- key.av.r * key.var/(1-key.av.r) if (correct) {cluster.corrected <- correct.cor(cluster.correl,t(key.alpha)) result <- list(cor=cluster.correl,sd=sqrt(var),corrected= cluster.corrected,alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6,Call=cl) } #correct for attenuation else { result <- list(cor=cluster.correl,sd=sqrt(var),alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6,Call=cl)} class(result) <- c ("psych", "cluster.cor") return(result)} #revised August 21, 2007 to add a smidgen to 1.0 in the looking for NAs. #revised June 14, 2008 to add average.r #revised August 25, 2009 to add lambda6 #revised December 2011 to remove digits -- this is all handled in the print function #revised January 2012 to estimate values when we have missing values in the correlations #74% of the time is spent doing matrix multiplication 70 seconds for a 400 x 400 problem. "icluster.cor" <- #added to speed up iclust by just combining rows function(keys,r.mat,ivar,jvar,correct=TRUE,SMC=TRUE,item.smc=NULL,impute=TRUE) { #function to extract clusters according to the key vector #default is to correct for attenuation and show this above the diagonal #find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster) #takes as input the keys matrix as well as a correlation matrix of all the items tol=sqrt(.Machine$double.eps) #machine accuracy cl <- match.call() if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix #covar <- t(keys) %*% r.mat %*% keys #matrix algebra is our friend but slow if we are doing this in iclust covar <- r.mat[-jvar,-jvar] #drop the jth column and row covar[ivar,] <- covar[,ivar] <- r.mat[ivar,] + r.mat[jvar,] #probably better to just modify the matrix for those two rows and columns that are changing if doing an iclust var <- diag(covar) #these are the scale variances sd.inv <- 1/sqrt(var) ident.sd <- diag(sd.inv,ncol = length(sd.inv)) cluster.correl <- ident.sd %*% covar %*% ident.sd cluster.correl[abs(cluster.correl) > (1+tol)] <- NA #happens only if item correlations were missing -- we use 1+a little to avoid rounding problem key.var <- diag(t(keys) %*% keys) key.smc <- t(keys) %*% item.smc key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) key.lambda6 <- (var - key.var + key.smc)/var key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r colnames(cluster.correl) <- colnames(keys) rownames(cluster.correl) <- colnames(keys) names(key.lambda6) <- colnames(keys) key.lambda6 <- drop(key.lambda6) # diag(r.mat) <- 0 # row.range <- apply(r.mat,1,range,na.rm=TRUE) # row.max <- pmax(abs(row.range[1,]),abs(row.range[2,])) #find the largest absolute similarity #now, try to figure out the imputed correlation for the case of NAs. if(any(is.na(cluster.correl)) && impute) { #find the missing values based upon average covariances rather than totals #first, change those bad values back to NA warning('Some of the correlations were NA and were imputed') r.mat[r.mat < -1] <- NA n.keys <- ncol(keys) keys[keys==0] <- NA #this will allow us to find average responses for (i in 1:n.keys) { #first find which variables are screwed up if(any(is.na(cluster.correl[i,]))) {#fix them for (j in 1:n.keys) {if(is.na(cluster.correl[i,j])) {#fix it temp <- mean(colMeans((keys[,i] * r.mat),na.rm=TRUE) * keys[,j],na.rm=TRUE)*key.var[i]*key.var[j] #this is the average covariance times the number of items scored adjusted.r <- temp * ident.sd[i,i]* ident.sd[j,j] cluster.correl[i,j] <- adjusted.r } } } } } if (correct) {cluster.corrected <- correct.cor(cluster.correl,t(key.alpha)) result <- list(cor=cluster.correl,sd=sqrt(var),corrected= cluster.corrected,alpha=key.alpha,av.r = key.av.r,size=key.var,G6 =key.lambda6,Call=cl) } #correct for attenuation else { result <- list(cor=cluster.correl,sd=sqrt(var),alpha=key.alpha,av.r = key.av.r,size=key.var,G6 =key.lambda6,Call=cl)} class(result) <- c ("psych", "cluster.cor") return(result)} #revised August 21, 2007 to add a smidgen to 1.0 in the looking for NAs. #revised June 14, 2008 to add average.r #revised August 25, 2009 to add lambda6 #revised December 2011 to remove digits -- this is all handled in the print function #revised January 2012 to estimate values when we have missing values in the correlationspsych/R/r.test.R0000644000176200001440000000626113332604155013152 0ustar liggesusers "r.test" <- function(n,r12, r34=NULL, r23=NULL,r13=NULL,r14=NULL,r24=NULL,n2=NULL,pooled=TRUE, twotailed=TRUE) { cl <- match.call() if(is.null(r34) & is.null(r13) & is.null(r23)) { #test for significance of r t <- r12*sqrt(n-2)/sqrt(1-r12^2) p <- 1-pt(abs(t),n-2) if(twotailed) p <- 2*p ci <- r.con(r12,n) result <- list(Call=cl,Test="Test of significance of a correlation",t=t,p=p,ci=ci) } else {if(is.null(r23)) { #compare two independent correlations if(is.null(r34)) {stop("You seem to be testing two dependent correlations, but have not specified the other correlation(s) correctly.")} if(!is.null(r13)) {stop("You seem to be testing two dependent correlations, but have not specified the correlation(s) correctly.")} xy.z <- 0.5*log((1+r12)/(1-r12)) xz.z <- 0.5*log((1+r34)/(1-r34)) if(is.null(n2)) n2 <- n se.diff.r <- sqrt(1/(n-3) + 1/(n2-3)) diff <- xy.z - xz.z z <- abs(diff/se.diff.r) p <- (1-pnorm(z )) if(twotailed) p <- 2*p result <- list(Call=cl,Test="Test of difference between two independent correlations",z=z,p=p) } else { if (is.null(r14)) {#compare two dependent correlations case 1 #here we do two tests of dependent correlations #figure out whether correlations are being specified by name or order if(!is.null(r34)) {if(is.null(r13)) {r13 <- r34} } if(is.null(r13)) {stop("You seem to be trying to test two dependent correlations, but have not specified the other correlation(s)")} diff <- r12-r13 determin=1-r12*r12 - r23*r23 - r13*r13 + 2*r12*r23*r13 av=(r12+r13)/2 cube= (1-r23)*(1-r23)*(1-r23) t2 = diff * sqrt((n-1)*(1+r23)/(((2*(n-1)/(n-3))*determin+av*av*cube))) p <- pt(abs(t2),n-3,lower.tail=FALSE) #changed to n-3 on 30/11/14 if(twotailed) p <- 2*p #the call is ambiguous, we need to clarify it cl <- paste("r.test(n = ",n, ", r12 = ",r12,", r23 = ",r23,", r13 = ",r13, ")") result <- list(Call=cl,Test="Test of difference between two correlated correlations",t=t2,p=p) } else { #compare two dependent correlations, case 2 z12 <- fisherz(r12) z34 <- fisherz(r34) pooledr <- (r12+r34)/2 if (pooled) { r1234= 1/2 * ((r13 - pooledr*r23)*(r24 - r23*pooledr) + (r14 - r13*pooledr)*(r23 - pooledr*r13) +(r13 - r14*pooledr)*(r24 - pooledr*r14) + (r14 - pooledr*r24)*(r23 - r24*pooledr)) z1234 <- r1234/((1-pooledr^2)*(1-pooledr^2))} else { r1234= 1/2 * ((r13 - r12*r23)*(r24 - r23*r34) + (r14 - r13*r34)*(r23 - r12*r13) +(r13 - r14*r34)*(r24 - r12*r14) + (r14 - r12*r24)*(r23 - r24*r34)) z1234 <- r1234/((1-r12^2)*(1-r34^2))} ztest <- (z12-z34)* sqrt(n-3) /sqrt(2*(1-z1234)) z <- ztest p <- (1-pnorm(abs(z) )) if(twotailed) p <- 2*p result <- list(Call=cl,Test="Test of difference between two dependent correlations",z=z,p=p) } } } class(result) <- c("psych", "r.test") return(result) } #Modified August 8, 2018 to flag improper input psych/R/faReg.R0000644000176200001440000000450414526765260012770 0ustar liggesusers#probably not needed since the loadings of the fa.extend are functionally the beta weights #redone to simply organize the results more clearly from fa.extend #and to find the t and p values of the beta weights #the beta weights are just factor extension values faReg <- faRegression <- function(r,nfactors=1,ov=NULL,dv=NULL, n.obs = NA, np.obs=NULL,correct=TRUE,rotate="oblimin",SMC=TRUE,warnings=TRUE, fm="minres",alpha=.1, omega=FALSE,cor="cor",use="pairwise",cor.correct=.5,weight=NULL,smooth=TRUE, ...) { cl <- match.call() # if(is.numeric(ev)) ev <- colnames(r)[ev] #in case we are selecting variables if(is.numeric(ov)) ov <- colnames(r)[ov] if(is.numeric(dv)) dv <- colnames(r)[dv] nv <- c(ov, dv) nv <- nfactors + length(dv) if(isCorrelation(r)) { #if the data are a correlation matrix, do an extension analysis #first find the extension analysis # n.obs <- NROW(r) fe <- fa.extend(r=r,nfactors=nfactors, ov = ov, ev = dv, n.obs = n.obs, np.obs= np.obs, correct=correct, rotate = rotate, SMC = SMC, warnings=warnings, fm =fm , alpha = alpha, omega= FALSE, cor="cor",use="pairwise",cor.correct=.5,weight=NULL,smooth=TRUE, ...) if(!isCorrelation(r)) {rdv <- cor(r[,dv],use=use)} else { rdv<- r[dv,dv]} fdv <- fe$loadings[dv,1:nfactors] } else { #do a conventional factor analysis on the ov and then find the factor score correlations with the dvs n.obs <- NROW(r) fe <- fa(r[ov], nfactors=nfactors, scores ="tenBerge") fdv <- cor(r[,dv],fe$scores, use=use) rdv <- cor(r[,dv],use=use) } Phi <- fe$Phi #Phi <- diag(1,nrow=nfactors) # we set this to identity to use the fa extension loadings #fdv <- fe$Structure[dv,1:nfactors] R <- matrix(NA,ncol=nv, nrow=nv) R[1:nfactors,1:nfactors] <- Phi R[1:nfactors,(nfactors +1):nv] <- t(fdv) R[(nfactors +1):nv, 1:nfactors] <- (fdv) R[((nfactors +1):nv),((nfactors +1):nv)] <- rdv rownames(R) <- colnames(R) <- c(colnames(Phi) , dv) diag(R) <- 1 set <- lmCor(y=dv, x = colnames(fdv), data=R ,n.obs=n.obs, plot=FALSE) result <- list(regression=set,fa.extend =fe, dv.cor =rdv, R= R, Call=cl) class(result) <- c("psych", "fa.reg") return(result) #return(set) } psych/R/cosinor.R0000755000176200001440000005543714444576725013443 0ustar liggesusers#functions for doing diurnal rhythm analyses #Heavily adapted from the circular package # #a function to estimate diurnal phase of mood data #the input is a data frame or matrix with #time of measurement (in 24 hour clock) #and then the mood measures (1 or many) #Version of October 22, 2008 #seriously revised April 12, 2009 # #modified February 14, 2015 to adjust the call to mean(x[1]) #and to make the grouping function actually work #find the best fitting phase (in hours) #cleaned up March 9, 2015 to allow a more natural calling sequence #modifed Jan - April, 2016 to make cleaner code #Added cosinor.period April 21, 2016 to iteratively fit period as an option "cosinor.period" <- function(angle,x=NULL,code=NULL,data = NULL, hours=TRUE,period=seq(23,26,1),plot=FALSE,opti=FALSE,na.rm=TRUE) { #first, organize the data in terms of the input if(!is.null(data)) { if(is.matrix(data)) data <- data.frame(data) if(is.character(angle)) angle <- which(colnames(data) == angle) if(!is.null(code)) { if(is.character(code)) codeloc <- which(colnames(data) ==code) x <- data[,c(angle,x,codeloc)] } else {x <- data[,c(angle,x)]} angle <- x[1] x <- x[-1] } else { if (is.null(x) && is.null(code)) {angle <- data.frame(angle) x <- angle angle<- angle[,1] } else {x <- data.frame(x) x <- cbind(angle,x) angle <- x[1] x <- x[-1] } } xdata <- x #we need to save this to do iterative fits old.angle <- angle per <- period fit.period <- list() for (i in 1:length(per)) { period <- per[i] if(hours) { angle <- old.angle*2*pi/period x <- cbind(angle,xdata)} #convert to radians nvar <- dim(xdata)[2] -1 if(is.null(code)) { fit <- cosinor1(angle,x[-1],period=period,opti=opti,na.rm=na.rm) #if there is a code (i.e., subject id), then do the fits for each separate subject m.resp <- mean(x[,1]) s.resp <- sd(x[,1])} else { #x <- angle fit.list <- by(x,x[code],function(x) cosinor1(angle=x[1],x=x[-c(1,which(colnames(x)== code))],period=period,opti=opti,na.rm=na.rm)) #this is the case if code is specified ncases <- length(fit.list) fit <- matrix(unlist(fit.list),nrow=ncases,byrow=TRUE) colnames(fit) <- c(paste(colnames(x)[-c(1,which(colnames(x)== code))], "phase",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "fit",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "amp",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "sd",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "mean",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "intercept",sep=".")) rownames(fit) <- names(fit.list) } fit.period[[i]] <- list(fit) } x <- NA #just to avoid being told that x is a global #now, find for each variable and for each subject, that value of of fit which is maximized, and then what is the ncols <- 6 * length(x) fit.m <- matrix(unlist(fit.period),nrow=ncases,byrow=FALSE) #the fits are every nvar * 6 elements starting at nvar + 1 maxfit <- per np <- length(per) fits <- cbind(matrix(NA,nrow=ncases,ncol=nvar),fit) for (j in 1:ncases) { #do it for each subject for (i in 1:nvar) {#do it for each variable for (p in 1:np) {#find the fits for all periods #maxfit[p] <- fit.m[j,(p-1) * nvar * 6 + nvar+1] maxfit[p] <- fit.period[[p]][[1]][j,i+nvar] } max.period <- which.max(maxfit) fits[j,i] <- per[max.period] fits[j,i+nvar] <- fit.period[[max.period]][[1]][j,i] fits[j,i+2*nvar] <- fit.period[[max.period]][[1]][j,i+nvar] fits[j,i+3*nvar] <- fit.period[[max.period]][[1]][j,i + 2*nvar] fits[j,i+4*nvar] <- fit.period[[max.period]][[1]][j,i+3* nvar] fits[j,i+5*nvar] <- fit.period[[max.period]][[1]][j,i + 4*nvar] fits[j,i+6*nvar] <- fit.period[[max.period]][[1]][j,i+5* nvar] } } return(fits) } #revised 9/15/18 to handle radians correctly and to handle character names for variables "circadian.phase" <- "cosinor" <- function(angle,x=NULL,code=NULL,data = NULL, hours=TRUE,period=24,plot=FALSE,opti=FALSE,na.rm=TRUE) { #first, organize the data in terms of the input if(!is.null(data)) { if(is.matrix(data)) data <- data.frame(data) if(is.character(angle)) angle <- which(colnames(data) == angle) if(is.character(x)) x <- which(colnames(data) ==x) if(!is.null(code)) { if(is.character(code)) codeloc <- which(colnames(data) ==code) x <- data[,c(angle,x,codeloc)] } else {x <- data[,c(angle,x)]} angle <- x[1] x <- x[-1] } else { if (is.null(x) && is.null(code)) {angle <- data.frame(angle) x <- angle angle<- angle[,1] } else {x <- data.frame(x) x <- cbind(angle,x) angle <- x[1] x <- x[-1] } } if(hours) { angle <- angle*2*pi/period } #convert to radians x <- cbind(angle,x) nvar <- dim(x)[2] if(is.null(code)) { fit <- cosinor1(angle,x[-1],period=period,opti=opti,na.rm=na.rm) #if there is a code (i.e., subject id), then do the fits for each separate subject m.resp <- mean(x[,1]) s.resp <- sd(x[,1]) if(plot) {#curve(cos((*x-fit[1,1])*s.resp+m.resp)*pi/12),add=TRUE) } #this draws the first fitted variable }} else {#x <- angle fit.list <- by(x,x[code],function(x) cosinor1(angle=x[1],x=x[-c(1,which(colnames(x)== code))],period=period,opti=opti,na.rm=na.rm)) #this is the case if code is specified ncases <- length(fit.list) fit<- matrix(unlist(fit.list),nrow=ncases,byrow=TRUE) colnames(fit) <- c(paste(colnames(x)[-c(1,which(colnames(x)== code))], "phase",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "fit",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "amp",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "sd",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "mean",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "intercept",sep=".")) rownames(fit) <- names(fit.list) } x <- NA #just to avoid being told that x is a global return(fit) } # cosinor1 actually does the work # it either uses a fitting function (optimize) from core R # or calls a linear regression fit "cosinor1" <- function(angle,x,period=24,opti=FALSE,na.rm=TRUE) { response <- x n.var <- dim(x)[2] if(is.null(n.var)) n.var <-1 fit <- matrix(NaN,nrow=n.var,ncol=6) for (i in 1:n.var) { if(opti) {fits <- optimize(f=phaser,c(0,24),time=angle,response=x[,i],period=period,maximum=TRUE) #iterative fit fit[i,1] <- fits$maximum fit[i,2] <- fits$objective } else {fits <- cosinor.lm2 (angle,x[,i],period=period,na.rm=na.rm) #simple linear regression based upon sine and cosine of time fit[i,1] <- fits[[1]] #this is the acrophase fit[i,2] <- fits[[2]] #this is the correlation of the fit with the data fit[i,3] <- fits[[3]] #this is the amplitude fit[i,4] <- fits[[4]] #The standard deviation of the observed scores fit[i,5] <- fits[[5]] #the mean of the observed scores fit[i,6] <- fits[[6]] #the predicted value of the observed score at the intercept } } colnames(fit) <- c("phase","fit","amplitude","sd","mean","intercept") rownames(fit) <- colnames(x) return(fit) } "phaser" <- function(phase,time,response,period) { #this is used in the iterative fit procedure phaser <- cor(cos(((time-phase)*2*pi)/period),response,use="pairwise")} #the alternative to the iterative procedure is simple linear regression of the cosine and sine "cosinor.lm2" <- function(angle,y,period=24,na.rm=na.rm) { p2 <- period/2 cos.t <- cos(angle) #angle is already in radians! sin.t <- sin(angle) dat.df <- data.frame(iv1=cos.t,iv2=sin.t,y) cor.dat <- cor(dat.df,use="pairwise") beta1 <- (cor.dat[3,1] - cor.dat[3,2] * cor.dat[1,2])/(cor.dat[1,1]-cor.dat[1,2]^2) beta2 <- (cor.dat[3,2] - cor.dat[3,1] * cor.dat[1,2])/(cor.dat[1,1]-cor.dat[1,2]^2) #note, these are standardized beta weights # phase <- ( sign(beta2) *acos( beta1/sqrt(beta1^2 + beta2^2)))*p2/pi #this is in hours phase <- atan2(beta2,beta1) #in radians intercept <- cos(phase) #the value at time 0 phase <- phase *p2/pi #convert to hours phase <- phase %% period r <- cor(cos(angle-phase*pi/p2),y,use="pairwise") sdy <- sd(y,na.rm=na.rm) meany <- mean(y,na.rm=na.rm) #amp <- r *sdy/.7223 amp <- sqrt(beta1^2 + beta2^2) #see Chow 2009 among others -- note we are finding the standardized amp intercept <- intercept * amp * sdy + meany #amp <- r * sd(y,na.rm=TRUE)/sd(cos(angle[,1]),na.rm=TRUE) #R <- sqrt(cor.dat[3,1]*beta1 + cor.dat[3,2]*beta2) #these are identical fit <- list(phase=phase,R=r,amp=amp,sd=sdy,mean=meany,intercept) return(fit) } "cosinor.plot" <- function(angle,x=NULL,data = NULL,IDloc=NULL,ID=NULL,hours=TRUE,period=24,na.rm=TRUE,ylim=NULL,ylab="observed",xlab="Time (double plotted)",main="Cosine fit",add=FALSE,multi=FALSE,typ="l",...) { if(!multi) {main <- paste("ID = ",ID," ",x)} if(!is.null(data)) { if(is.matrix(data)) data <- data.frame(data) # if(is.character(angle)) angle <- which(colnames(data) == angle) if(!is.null(IDloc)) { x <- data[data[,IDloc]==ID,c(angle,x)] angle <- x[,1,drop=FALSE] } else {x <- data[,c(angle,x)] angle <- x[,1,drop=FALSE] main <- c(main," ",IDloc)} } else { if (is.null(x) && is.null(data)) {x <- data.frame(x) x <- angle angle<- angle[1] } else {x <- data.frame(x) x <- cbind(angle,x) angle <- x[1] x <- x[-1] } } if(hours) { angle <- angle*2*pi/24 } xx <- 1:96 fit <- cosinor1(angle, x = x[2], period = period, na.rm = na.rm) m.resp <- mean(x[, 2], na.rm = TRUE) s.resp <- sd(x[, 2], na.rm = TRUE) sd.time <- sd(cos(angle[, 1]), na.rm = TRUE) if(missing(ylim)) ylim=c(min(x[,2],(m.resp - fit[1,3]),na.rm=na.rm),max(x[,2],(m.resp + fit[1,3]),na.rm=na.rm)) if(!multi | !missing(main)){main <- paste(main," ",round(fit[1,1],2)) } else {main=main} # plot(xx/2,cos((xx/2-fit[1,1])*pi/12)*s.resp*fit[1,2]/.707+ m.resp,typ=typ,ylim=ylim,...) # plot(xx/2,cos((xx/2-fit[1,1])*pi/12)*fit[1,3]*s.resp+ #m.resp,typ="l",ylim=ylim,ylab=ylab,xlab=xlab,main=main,...) #plot the lines first if(!add) {plot(xx/2,cos((xx/2-fit[1,1])*pi/12)*s.resp*fit[1,2]/.707+ m.resp,typ=typ,ylim=ylim,ylab=ylab,xlab=xlab,main=main,...) } else { points(xx/2,cos((xx/2-fit[1,1])*pi/12)*s.resp*fit[1,2]/.707+ m.resp,typ=typ,ylim=ylim,ylab=ylab,xlab=xlab,main=main,...) } if(!multi) { points(c(x[,1],x[,1] + 24),rep(x[,2],2),...)} else {points(xx/2,cos((xx/2-fit[1,1])*pi/12)*s.resp*fit[1,2]/.707+ m.resp,typ="l",...)} #this draws the first fitted variable } #Added March 26, 2015 to do split half (first/second) reliabilities "circadian.reliability" <- function(angle,x=NULL,code=NULL,data = NULL,min=16, oddeven=FALSE,hours=TRUE,period=24,plot=FALSE,opti=FALSE,na.rm=TRUE) { cl <- match.call() if(!is.null(data)) { if(is.character(angle)) angle <- which(colnames(data) == angle) if(!is.null(code)) { if(is.character(code)) codeloc <- which(colnames(data) ==code) x <- data[,c(angle,x,codeloc)] } else {x <- data[,c(angle,x)]} angle <- x[1] x <- x[-1] } else { if (is.null(x) && is.null(code)) {x <- angle angle<- angle[,1] } else {x <- cbind(angle,x) angle <- x[1] x <- x[-1] } } if(hours) { angle <- angle*2*pi/period x <- cbind(angle,x) } n.obs <- dim(x)[1] if(is.null(code)) { fit <- cosinor.rel(angle,x,period=period,na.rm=na.rm) #if there is a code (i.e., subject id), then do the fits for each separate subject m.resp <- mean(x[,1]) s.resp <- sd(x[,1]) } else { fit.list <- by(x,x[,code],function(x) cosinor.rel(angle=x[1],x=x[-c(1,which(colnames(x)== code))],min=min,oddeven=oddeven,na.rm=na.rm)) #this is the case if code is specified ncases <- length(fit.list) fit <- matrix(unlist(fit.list),nrow=ncases,byrow=TRUE) colnames(fit) <- c(paste(colnames(x)[-c(1,which(colnames(x)== code))], "phase1",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "phase2",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "fit1",sep="."),paste(colnames(x)[-c(1,which(colnames(x)== code))], "fit2",sep=".")) rownames(fit) <- names(fit.list) } nvar <-ncol(fit)/4 r <- circadian.cor(fit[,1:(nvar*2)]) r.fit <- cor(fit[,(nvar*2+1):ncol(fit)],use="pairwise") splithalf <- split.fit <- rep(NA,nvar) for (i in 1:nvar) {splithalf[i] <- r[i,(nvar+i)] split.fit[i] <- r.fit[i,(nvar+i)]} rel <- splithalf * 2/(1+splithalf) fit.rel <- split.fit * 2/(1+split.fit) names(rel) <- paste(colnames(x)[-c(1,which(colnames(x)== code))]) names(fit.rel) <- paste(colnames(x)[-c(1,which(colnames(x)== code))]) # x <- NA #just to avoid being told that x is a global #now do the F test between the two splits split.F <- circadian.split.F(fit) result <- list(phase.rel=rel,fit.rel=fit.rel,split.F = split.F, splits=fit,Call=cl) class(result) <- c("psych","circadian","reliability") return(result) } # cosinor.rel actually does the work # or calls a linear regression fit "cosinor.rel" <- function(angle,x,min=16,oddeven=FALSE,period=24,na.rm=TRUE) { response <- x n.var <- dim(x)[2] n.obs <- dim(x)[1] if(is.null(n.var)) n.var <-1 fit <- matrix(NaN,nrow=n.var,ncol=4) if(n.obs >= min) { for (i in 1:n.var) { if(oddeven) {fits1 <- cosinor.lm2 (angle[seq(1,n.obs,2),1],x[seq(1,n.obs,2),i],na.rm=na.rm) fits2 <- cosinor.lm2 (angle[seq(2,n.obs,2),1],x[seq(2,n.obs,2),i],na.rm=na.rm) } else { fits1 <- cosinor.lm2 (angle[1:n.obs/2,1],x[1:n.obs/2,i],na.rm=na.rm) fits2 <- cosinor.lm2 (angle[(n.obs/2+1):n.obs,1],x[(n.obs/2+1):n.obs,i],na.rm=na.rm) #simple linear regression based upon sine and cosine of time} #simple linear regression based upon sin and cosine of time } fit[i,1] <- fits1[[1]] #this is the acrophase fit[i,3] <- fits1[[2]] #this is the correlation of the fit with the data fit[i,2] <- fits2[[1]] #this is the acrophase fit[i,4] <- fits2[[2]] #this is the correlation of the fit with the data } } colnames(fit) <- c("phase","phase2","fit","fit2") rownames(fit) <- colnames(x) return(fit) } "circadian.split.F" <- function(angle,hours=TRUE,na.rm=TRUE) { nvar <- ncol(angle)/4 stats1 <- circadian.stats(angle[,1:nvar]) stats2 <- circadian.stats(angle[,(nvar+1):(nvar*2)]) pool <- rbind(angle[,1:nvar],angle[,(nvar+1):(nvar*2)]) all <- circadian.stats(pool) allR <- all$n * all$R within <- matrix(c(stats1$n*stats1$R,stats2$n*stats2$R),ncol=2) rownames(within) <- rownames(all) ngroups <- 2 SSb <- rowSums(within) - allR SSw <- all$n - rowSums(within) dfw <- all$n - ngroups MSw <- SSw/dfw dfb = ngroups -1 MSb <- SSb/dfb F <- MSb/MSw prob <- 1-pf(F,dfb,dfw) F.df <- data.frame(SSb= SSb,dfb=dfb,MSb=MSb,SSw=SSw,dfw=dfw,MSw=MSw,F=F,prob=prob) result<- list(pooled =all,group1 =stats1, group2=stats2 ,F=F.df) class(result) <- c("psych","circadian") return(result) } ## # #find the mean phase of output from cosiner or any other circular data set #can find the mean phase of data in radians or hours (default) # "circadian.mean" <- function(angle,data=NULL,hours=TRUE,na.rm=TRUE) { if(!is.null(data)) angle <- data[,angle] if(hours) { angle <- angle*2*pi/24 } x <- cos(angle) y <- sin(angle) if (is.vector(angle)) { mx <- mean(x,na.rm=na.rm) my <- mean(y,na.rm=na.rm) } else { mx <- colMeans(x,na.rm=na.rm) my <- colMeans(y,na.rm=na.rm) } mean.angle <- sign(my) * acos((mx)/sqrt(mx^2+my^2)) # mean.angle <- atan(my/mx) #according to circular stats, but the other form is clearer if (hours) {mean.angle <- mean.angle*24/(2*pi) mean.angle[mean.angle <= 0] <- mean.angle[mean.angle<=0] + 24} return(mean.angle) } "circadian.sd" <- function(angle,data=NULL,hours=TRUE,na.rm=TRUE) { if(!is.null(data)) angle <- data[,angle] if(hours) { angle <- angle*2*pi/24 } nvar <- dim(angle)[2] if(is.null(nvar)) nvar <- 1 x <- cos(angle) y <- sin(angle) if(nvar > 1) { mx <- colSums(x,na.rm=na.rm) my <- colSums(y,na.rm=na.rm) n.obs <- colSums(!is.na(angle))} else { mx <- sum(x,na.rm=na.rm) my <- sum(y,na.rm=na.rm) n.obs <- sum(!is.na(angle))} R <- sqrt(mx^2+my^2)/n.obs mean.angle <- sign(my) * acos((mx/n.obs)/R) Rvar <- 1 - R sd <- sqrt(-2 * log(R)) #for (i in 1:nvar) {#the logic is that larger deviations are weighted more, up to the sin(theta) # var[i] <- sum(sin(angle[,i] -mean.angle[i])^2 ,na.rm=na.rm) } #n.obs <- colSums(!is.na(angle)) ##but these are in radians! if(hours) {#sd <- sd * 24/(pi*2) Rvar <- Rvar * 24/(pi*2)} return(list(Rvar=Rvar,sd =sd,R= R)) } "circadian.stats" <- function(angle,data=NULL,hours=TRUE,na.rm=TRUE) { cl <- match.call() means <- circadian.mean(angle=angle,data=data,hours=hours,na.rm=na.rm) csd <- circadian.sd(angle=angle,hours=hours,na.rm=na.rm) if(!is.null(data)) angle <- data[,angle] if(length(means)>1 ) { n.obs <- colSums(!is.na(angle))} else {n.obs <- sum(!is.na(angle)) } R <- csd$R if(hours) {sd <- csd$sd*24/(2*pi)} else {sd <- csd$sd} z <- n.obs * R^2 p <- exp(-z) result<- data.frame(n=n.obs,mean=means,sd=sd,R,z=z,p=p) #result <- list(n=n.obs,mean=means,sd=sd,R,z=z,p=p,call=cl) class(result) <- c("psych","circadian","data.frame") return(result) } "circadian.F" <- function(angle,group,data=NULL,hours=TRUE,na.rm=TRUE) { if(!is.null(data)) {angle <- data[,angle] group <- data[,group]} stats <- by(angle,group,circadian.stats) all <- circadian.stats(angle) allR <- all$n * all$R nR <- lapply(stats,function(x) x$n * x$R) ngroups <- length(nR) within <- matrix(unlist(nR),ncol=ngroups) rownames(within) <- rownames(all) SSb <- rowSums(within) - allR SSw <- all$n - rowSums(within) dfw <- all$n - ngroups MSw <- SSw/dfw dfb = ngroups -1 MSb <- SSb/dfb F <- MSb/MSw prob <- 1-pf(F,dfb,dfw) F.df <- data.frame(SSb= SSb,dfb=dfb,MSb=MSb,SSw=SSw,dfw=dfw,MSw=MSw,F=F,prob=prob) result<- list(pooled =all,bygroup = stats,F=F.df) class(result) <- c("psych","circadian") return(result) } print_circadian <- function(x,short=TRUE,digits=2) { if(!is.null(x$Call)) {cat("Call: ") print(x$Call)} cat("\nCircadian Statistics :\n") if(!is.null(x$F)) { cat("\nCircadian F test comparing groups :\n") print(round(x$F,digits)) if(short) cat("\n To see the pooled and group statistics, print with the short=FALSE option") } if(!is.null(x$pooled) && !short) { cat("\nThe pooled circadian statistics :\n") print( x$pooled)} if(!is.null(x$bygroup) && !short) {cat("\nThe circadian statistics by group:\n") print(x$bygroup)} #if(!is.null(x$result)) print(round(x$result,digits)) if(!is.null(x$phase.rel)) { cat("\nSplit half reliabilities are split half correlations adjusted for test length\n") x.df <- data.frame(phase=x$phase.rel,fits=x$fit.rel) print(round(x.df,digits)) } if(is.data.frame(x)) {class(x) <- "data.frame" print(round(x,digits=digits)) } } ## The circular correlation matrix of phase data #adapted from the circStats package #with various modifications for the study of mood data #one change is not use atan but rather use cosine over length # "circadian.cor" <- function(angle,data=NULL,hours=TRUE,na.rm=TRUE) { if(!is.null(data)) angle <- data[,angle] if(hours) { angle <- angle*2*pi/24 } nvar <- dim(angle)[2] correl <- diag(nvar) x <- cos(angle) y <- sin(angle) mx <- colMeans(x,na.rm=na.rm) my <- colMeans(y,na.rm=na.rm) mean.angle <- sign(my) * acos((mx)/sqrt(mx^2+my^2)) for (i in 1:nvar) {#the logic is that larger deviations are weighted more, up to the sin(theta) for (j in 1:i) {covar <- sum(sin(angle[,i] -mean.angle[i]) *sin(angle[,j] -mean.angle[j]),na.rm=na.rm) correl[i,j] <- correl[j,i] <- covar} } var <- diag(correl)/colSums(!is.na(angle)) sd <- diag(sqrt(1/diag(correl))) correl <- sd %*% correl %*% sd colnames(correl) <- rownames(correl) <- colnames(angle) return(correl) } #to find the correlation of a linear variable (e.g., a personality trait) with a circular one (e.g., phase) "circadian.linear.cor" <- function(angle,x=NULL,data=NULL,hours=TRUE) { if(!is.null(data)) angle <- data[,angle] if(hours) { angle <- angle*2*pi/24 } if(is.null(x)) {x <- angle[2:dim(angle)[2]] angle <- angle[1]} cos.angle <- cos(angle) sin.angle <- sin(angle) cor.cos <- cor(cos.angle,x,use="pairwise") cor.sin <- cor(sin.angle,x,use="pairwise") if(!is.vector(angle)) {cor.cs <- diag(cor(cos.angle,sin.angle,use="pairwise"))} else {cor.cs <- cor(cos.angle,sin.angle,use="pairwise")} R <- sqrt((cor.cos^2 + cor.sin^2 - 2 * cor.cos * cor.sin * cor.cs)/(1-cor.cs^2))*sign(cor.cos) return(R) } "circadian.plot" <- function(angle,x=NULL,hours=TRUE,title="Polar plot") { if(hours) { angle <- angle*2*pi/24 } x1 <- cos(angle) * x y1 <- sin(angle) * x maxx <- max(x) plot(x1,y1,axes=FALSE,xlab="",ylab="",xlim=c(-maxx,maxx),ylim=c(-maxx,maxx),asp=1) segments <- 51 angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) points(unit.circle*maxx,typ="l") text(maxx,0,"24",pos=4) text(-maxx,0,"12",pos=2) text(0,maxx,"6",pos=3) text(0,-maxx,"18",pos=1) } "circular.mean" <- function(angle,na.rm=TRUE) { x <- cos(angle) y <- sin(angle) if (is.vector(angle)) { mx <- mean(x,na.rm=na.rm) my <- mean(y,na.rm=na.rm) } else { mx <- colMeans(x,na.rm=na.rm) my <- colMeans(y,na.rm=na.rm) } mean.angle <- sign(my) * acos((mx)/sqrt(mx^2+my^2)) #mean.angle <- atan(my/mx) #according to circular stats, but the other form is clearer return(mean.angle) } "circular.cor" <- function(angle,na.rm=TRUE) { nvar <- dim(angle)[2] correl <- diag(nvar) x <- cos(angle) y <- sin(angle) mx <- colMeans(x,na.rm=na.rm) my <- colMeans(y,na.rm=na.rm) mean.angle <- sign(my) * acos((mx)/sqrt(mx^2+my^2)) for (i in 1:nvar) {#the logic is that larger deviations are weighted more, up to the sin(theta) for (j in 1:i) {covar <- sum(sin(angle[,i] -mean.angle[i]) *sin(angle[,j] -mean.angle[j])) correl[i,j] <- correl[j,i] <- covar} } var <- diag(correl) sd <- diag(sqrt(1/diag(correl))) correl <- sd %*% correl %*% sd colnames(correl) <- rownames(correl) <- colnames(angle) return(list(correl,var)) } #deprecated "circadian.linear.cor.2" <- function(angle,x,hours=TRUE) { if(hours) { angle <- angle*2*pi/24 } cos.angle <- cos(angle) sin.angle <- sin(angle) cor.cos <- cor(cos.angle,x,use="pairwise") cor.sin <- cor(sin.angle,x,use="pairwise") if(!is.vector(angle)) {cor.cs <- diag(cor(cos.angle,sin.angle))} else {cor.cs <- cor(cos.angle,sin.angle)} R <- sqrt((cor.cos^2 + cor.sin^2 - 2 * cor.cos * cor.sin * cor.cs)/(1-cor.cs^2)) return(R) }psych/R/VSS.simulate.R0000644000176200001440000000322611025622432014221 0ustar liggesusers"VSS.sim" <- function(ncases=1000,nvariables=16,nfactors=4,meanloading=.5,dichot=FALSE,cut=0) #generates a simple structure factor matrix #with nfactors { weight=sqrt(1-meanloading*meanloading) #loadings are path coefficients theta=matrix(rnorm(ncases*nfactors),nrow=ncases,ncol=nvariables) #generates nfactor independent columns, repeated nvariable/nfactor times) error=matrix(rnorm(ncases*nvariables),nrow=ncases,ncol=nvariables) #errors for all variables items=meanloading*theta+weight*error #observed score = factor score + error score if(dichot) {items <- (items[,1:nvariables] >= cut) items <- items + 0} return(items) } "VSS.simulate" <- function(ncases=1000,nvariables=16,nfactors=4,meanloading=.5,dichot=FALSE,cut=0) #generates a simple structure factor matrix #with nfactors { weight=sqrt(1-meanloading*meanloading) #loadings are path coefficients theta=matrix(rnorm(ncases*nfactors),nrow=ncases,ncol=nvariables) #generates nfactor independent columns, repeated nvariable/nfactor times) error=matrix(rnorm(ncases*nvariables),nrow=ncases,ncol=nvariables) #errors for all variables items=meanloading*theta+weight*error #observed score = factor score + error score if(dichot) {items <- (items[,1:nvariables] >= cut) items <- items + 0} return(items) } psych/R/ICLUST.R0000644000176200001440000001724314346210700012733 0ustar liggesusers#ICLUST - a function to form homogeneous item composites # originally based upon Revelle, W. (1979). Hierarchical cluster analysis and the internal structure of tests. Multivariate Behavioral Research, 14, 57-74. # but much changed over the years # pseudo code # find similarity matrix # original is either covariance or correlation # corrected is disattenuated #find most similar pair #if size of pair > min size, apply beta criterion # if beta new > min(beta 1, beta 2) combine pair #update similarity matrix #repeat until finished #then report various summary statistics #ICLUST is the main function and calls other routines "ICLUST" <- function (r.mat,nclusters=0,alpha=3,beta=1,beta.size=4,alpha.size=3,correct=TRUE, correct.cluster=TRUE,reverse=TRUE,beta.min=.5,output=1,digits=2,labels=NULL,cut=0,n.iterations=0,title="ICLUST",plot=TRUE,weighted=TRUE,cor.gen =TRUE,SMC=TRUE,purify=TRUE,diagonal=FALSE ) { iclust(r.mat,nclusters,alpha,beta,beta.size,alpha.size,correct,correct.cluster,reverse,beta.min,output,digits,labels,cut,n.iterations,title,plot,weighted,cor.gen,SMC,purify,diagonal)} "iclust" <- function (r.mat,nclusters=0,alpha=3,beta=1,beta.size=4,alpha.size=3,correct=TRUE,correct.cluster=TRUE,reverse=TRUE,beta.min=.5,output=1,digits=2,labels=NULL,cut=0,n.iterations=0,title="ICLUST",plot=TRUE,weighted=TRUE,cor.gen =TRUE,SMC=TRUE,purify=TRUE,diagonal=FALSE ) {#should allow for raw data, correlation or covariances #ICLUST.options <- list(n.clus=1,alpha=3,beta=1,beta.size=4,alpha.size=3,correct=TRUE,correct.cluster=TRUE,reverse=TRUE,beta.min=.5,output=1,digits=2,cor.gen=TRUE) cl <- match.call() if(is.null(labels)) {labels <- colnames(r.mat)} else {if((length(labels)==1) && (!labels)) labels <- NULL} #modified October 11, 2011 ICLUST.debug <- FALSE ICLUST.options <- list(n.clus=nclusters,alpha=alpha,beta=beta,beta.size=beta.size,alpha.size=alpha.size,correct=correct,correct.cluster=correct.cluster,reverse=reverse,beta.min=beta.min,output=output,digits=digits,weighted=weighted,cor.gen=cor.gen,SMC=SMC) if(dim(r.mat)[1]!=dim(r.mat)[2]) {r.mat <- cor(r.mat,use="pairwise") } else {r.mat <- cov2cor(r.mat)} #cluster correlation matrices, find correlations if not square matrix -- added the conversion from covariances to correlations, March, 2012 if(!is.matrix(r.mat)) {r.mat <- as.matrix(r.mat)} # for the case where we read in a correlation matrix as a data.frame nvar <- dim(r.mat)[2] if(nvar < 3 ) {message("Cluster analysis of items is only meaningful for more than 2 variables. Otherwise, you will find one cluster that is just the composite of the two. Beta = Alpha = 2*r/(1+r). Have you made a mistake? \n Try calling the alpha function to give some trivial statistics.") stop() } if(is.null(colnames(r.mat))) {colnames(r.mat) <- paste("V",1:nvar,sep="")} if(is.null(rownames(r.mat))) {rownames(r.mat) <- paste("V",1:nvar,sep="")} #added 24/4/15 to check for bad data if(any(is.na(r.mat))) { bad <- TRUE tempr <-r.mat wcl <-NULL while(bad) { wc <- table(which(is.na(tempr), arr.ind=TRUE)) #find the correlations that are NA wcl <- c(wcl,as.numeric(names(which(wc==max(wc))))) tempr <- r.mat[-wcl,-wcl] if(any(is.na(tempr))) {bad <- TRUE} else {bad <- FALSE} } cat('\nLikely variables with missing values are ',colnames(r.mat)[wcl],' \n') stop("I am sorry: missing values (NAs) in the correlation matrix do not allow me to continue.\nPlease drop those variables and try again." ) } #added this 30/12/13 to improve speed if(ICLUST.options$SMC) {smc.items <- smc(r.mat)} else {smc.items <- rep(1,nvar)} iclust.results <- ICLUST.cluster(r.mat,ICLUST.options,smc.items) #ICLUST.cluster does all the work - the answers are in iclust.results loads <- cluster.loadings(iclust.results$clusters,r.mat,SMC=SMC) #summarize the results by using cluster.loadings -- these are the original values if(is.matrix(iclust.results$clusters) ) { eigenvalue <- diag(t(loads$pattern) %*% loads$loading) sorted.cluster.keys.ord <- order(eigenvalue,decreasing=TRUE) sorted.cluster.keys <- iclust.results$clusters[,sorted.cluster.keys.ord] loads <- cluster.loadings(sorted.cluster.keys,r.mat,SMC=SMC) #these are the original cluster loadings with clusters sorted by eigenvalues iclust.results$clusters <- sorted.cluster.keys cluster.beta <- iclust.results$results[colnames(sorted.cluster.keys),"beta"] names(cluster.beta) <- colnames(sorted.cluster.keys)} else {sorted.cluster.keys <- iclust.results$clusters} #these are fine fits <- cluster.fit(r.mat,as.matrix(loads$loadings),iclust.results$clusters,diagonal) #check this sorted <- ICLUST.sort(ic.load=loads,labels=labels,cut=cut) #sort the loadings (again?) This is done for sorted output if desired if(is.matrix(sorted.cluster.keys) ) {cluster.beta <- iclust.results$results[colnames(sorted.cluster.keys),"beta"] names(cluster.beta) <- colnames(sorted.cluster.keys) } else { number.of.clusters <- dim(iclust.results$results)[1] cluster.beta <- iclust.results$results[number.of.clusters,"beta"]} #now, iterate the cluster solution to clean it up (if desired) clusters <- as.matrix(iclust.results$clusters) #just in case there is only one cluster -- these are now sorted by eigen value if (dim(clusters)[2]==0 ) {warning('no items meet the specification time1')} old.clusters <- clusters old.fit <- fits$clusterfit if (ICLUST.debug) {print(paste('clusters ',clusters))} if(purify) {clusters <- factor2cluster(loads,cut=cut) #this will assign items to the clusters with the highest loadings -- might be different from original solution clusters <- as.matrix(clusters) } #in case only one cluster if (dim(clusters)[2]==0 ) {warning('no items meet the specification stage 2',immediate.=TRUE)} if (ICLUST.debug) {print(paste('clusters ',clusters)) print(paste('loads ',loads))} loads <- cluster.loadings(clusters,r.mat,SMC=SMC) if (n.iterations > 0) { #it is possible to iterate the solution to perhaps improve it for (steps in 1:n.iterations) { # loads <- cluster.loadings(clusters,r.mat,SMC=SMC) clusters <- factor2cluster(loads,cut=cut) if(dim(clusters)[2]!=dim(old.clusters)[2]) {change <- 999 loads <- cluster.loadings(clusters,r.mat,SMC=SMC) } else { change <- sum(abs(clusters)-abs(old.clusters)) } #how many items are changing? fit <- cluster.fit(r.mat,as.matrix(loads$loadings),clusters,diagonal) old.clusters <- clusters print(paste("iterations ",steps," change in clusters ", change, "current fit " , fit$clusterfit)) if ((abs(change) < 1) | (fit$clusterfit <= old.fit)) {break} #stop iterating if it gets worse or there is no change in cluster definitions old.fit <- fit$cluster.fit } } p.fit <- cluster.fit(r.mat,as.matrix(loads$loadings),clusters,diagonal) p.sorted <- ICLUST.sort(ic.load=loads,labels=labels,cut=cut,keys=TRUE) #at this point, the clusters have been cleaned up, but are not in a sorted order. Sort them purified <- cluster.cor(p.sorted$clusters,r.mat,SMC=SMC,item.smc=smc.items) class(loads$loadings) <- "loading" result <- list(title=title,clusters=iclust.results$clusters,corrected=loads$corrected,loadings=loads$loadings,pattern=loads$pattern,G6 = loads$G6,fit=fits,results=iclust.results$results,cor=loads$cor,Phi=loads$cor,alpha=loads$alpha,beta=cluster.beta,av.r = loads$av.r,size=loads$size, sorted=sorted, p.fit = p.fit,p.sorted = p.sorted,purified=purified,purify=purify,call=cl) #if(plot && requireNamespace('Rgraphviz')) {ICLUST.rgraph(result,labels=labels,title=title,digits=digits)} result$order <- iclust.diagram(result,labels=labels,main=title,digits=digits,plot=plot) class(result) <- c("psych","iclust") return(result) } psych/R/grouplag.R0000644000176200001440000000044711274424431013553 0ustar liggesusersgrouplag <- function(x,gr,varnum=NULL) { result <- by(x,gr,lag,varnum) return(result) } lag <- function(x,varnum) {if(!is.null(varnum)) x <- x[-varnum] cname <- colnames(x) results <- cbind(x, x[row(x[,1,drop=FALSE])+1,]) colnames(results) <- c(cname,paste(cname,2,sep="")) return(results) } psych/R/fa.parallel.poly.R0000644000176200001440000001426313761466035015110 0ustar liggesusers #parallel analysis of polychoric factor analysis "fa.parallel.poly" <- function(x,n.iter=10,SMC=TRUE,fm="minres",correct=TRUE,sim=FALSE,fa="both",global=TRUE) { p <- .05 cl <- match.call() .Deprecated("fa.parallel.poly", msg = "fa.parallel.poly is deprecated. Please use the fa.parallel function with the cor='poly' option.") n.obs <- dim(x)[1] tx <- table(as.matrix(x)) nx <- dim(tx)[1] if(dim(tx)[1] ==2) {tet <- tetrachoric(x,correct=correct) typ = "tet" if(sim) { tx.item <- matrix(apply(x,2,table),ncol=ncol(x)) px <- matrix(tx.item/colSums(tx.item),ncol=2,byrow=TRUE) } } else {tet <- mixedCor(x,global=global) typ = "poly"} cat("\n") #to try to clear the progress bar flush(stdout()) r <- tet$rho f <- fa(r,n.obs=n.obs,SMC = SMC,covar=FALSE,fm=fm) #call fa with the appropriate parameters f$Call <- cl fl <- f$loadings #this is the original nvar <- dim(fl)[1] e.values <- list(pc =list(),fa = list()) replicates <- list() rep.rots <- list() for (trials in 1:n.iter) { progressBar(trials,n.iter,"fa.parallel.poly") bad <- TRUE while(bad) { xs <- matrix(apply(x,2,function(y) sample(y,n.obs,replace=TRUE)),ncol=nvar) #do it column wise tets <- polychoric(xs,progress=FALSE,global=global) r <- tets$rho bad <- any(is.na(r)) } values.samp <- eigen(tets$rho)$values e.values[["pc"]][[trials]] <- values.samp fs <- fa(r,n.obs=n.obs,SMC = SMC,covar=FALSE,fm=fm) #call fa with the appropriate parameters e.values[["fa"]][[trials]] <- fs$values if(sim && (typ=="tet")) {xsim <- matrix(apply(px,1,function(x) sample(2,n.obs,TRUE,prob=x)),ncol=nvar) rho.sim <- tetrachoric(xsim,correct=correct) values.sim <- eigen(rho.sim$rho)$values e.values[["pc.sim"]][[trials]] <- values.sim fsim <- fa(rho.sim$rho,n.obs=n.obs,SMC = SMC,covar=FALSE,fm=fm) #call fa with the appropriate parameters e.values[["fa.sim"]][[trials]] <- fsim$values } } cat("\n") #to try to clear the progress bar ei.pc <-describe(matrix(unlist(e.values$pc),ncol=nvar,byrow=TRUE)) #eigen values of pcs ei.fa <- describe(matrix(unlist(e.values$fa),ncol=nvar,byrow=TRUE)) #eigen values of fa fa.test <- which(!(f$values > ei.fa$mean))[1]-1 pc.test <- which(!(f$e.values > ei.pc$mean))[1] -1 #e.stats <- list(fa.values=f$values,pc.values=f$e.values,pc.sim=ei.pc,fa=ei.fa,nfact=fa.test,ncomp = pc.test) if(sim && (typ=="tet")) {eis.pc <- describe(matrix(unlist(e.values$pc.sim),ncol=nvar,byrow=TRUE)) #eigen values of pcs eis.fa <- describe(matrix(unlist(e.values$fa.sim),ncol=nvar,byrow=TRUE)) #eigen values of fa } else {eis.pc <- NULL eis.fa <- NULL } #results <- list(fa = f,rho=tet$rho,tau=tet$tau,n.obs=n.obs,Call= cl,e.values=e.values,e.stats=e.stats) results <- list(rho=tet$rho,tau=tet$tau,n.obs=n.obs,Call= cl,fa.values=f$values,pc.values=f$e.values,pc.sim=ei.pc,fa.sim=ei.fa,pcs.sim=eis.pc,fas.sim=eis.fa,nfact=fa.test,ncomp = pc.test) class(results) <- c("psych","parallel") cat('\n See the graphic output for a description of the results\n') plot.poly.parallel(results,fa=fa) return(results) } #written May 8 2011 #corrected December 27, 2011 to pass the fm correctly (had always forced minres) #modified Sept 16, 2013 to use mixed.cor instead of polychoric (more robust) #modified Oct 2, 2013 to add the ability to do random data as well #modified Oct 22, 2013 to allow choice of what to plot #modified 1/15/14 to pass the global parameter #modified 3/24/14 to check for bad resamples "plot.poly.parallel" <- function(x,show.legend=TRUE,fa="both",...) { e.values <- x$pc.values values <- x$fa.values pcm <- x$pc.sim$mean fam <- x$fa.sim$mean switch(fa, both = { plot(e.values,type="b", main = "Eigen values of tetrachoric/polychoric matrix",ylab="Eigen values of original and simulated factors and components",ylim=c(0,max(e.values)) ,xlab="Factor Number",pch=4,col="blue") points(values,type ="b",pch=2,col="blue") points(pcm,type="l",lty="dotted",pch=2,col="red") points(fam,type="l",lty="dashed",pch=2,col="red") if(!is.null(x$pcs.sim)) points(x$pcs.sim$mean,type="l",lty="dashed",pch=2,col="red") if(!is.null(x$fas.sim)) points(x$fas.sim$mean,type="l",lty="dashed",pch=2,col="red") }, fa = {plot(values,type="b", main = "Eigen values of tetrachoric/polychoric matrix",ylab="Eigen values of original and simulated factors ",ylim=c(0,max(e.values)) ,xlab="Factor Number",pch=2,col="blue") points(fam,type="l",lty="dashed",pch=2,col="red")}, pc = {plot(e.values,type="b", main = "Eigen values of tetrachoric/polychoric matrix",ylab="Eigen values of original and simulated factors and components",ylim=c(0,max(e.values)) ,xlab="Factor Number",pch=4,col="blue") points(pcm,type="l",lty="dotted",pch=2,col="red")} ) if(show.legend) { switch(fa, both ={ if(!is.null(x$pcs.sim)) { legend("topright", c("PC Actual Data", " PC Resampled Data"," PC Simulated Data","FA Actual Data", " FA Resampled Data", " FA Simulated Data"), col = c("blue","red","red","blue","red","red"),pch=c(4,NA,NA,2,NA,NA), text.col = "green4", lty = c("solid","dotted","dashed","solid","dotted","dashed"), merge = TRUE, bg = 'gray90') } else {legend("topright", c("PC Actual Data", " PC Resampled Data","FA Actual Data", " FA Resampled Data"), col = c("blue","red","blue","red"),pch=c(4,NA,2,NA), text.col = "green4", lty = c("solid","dotted","solid","dotted"), merge = TRUE, bg = 'gray90') } }, fa ={ legend("topright", c("FA Actual Data", " FA Resampled Data"), col = c("blue","red"),pch=c(2,NA), text.col = "green4", lty = c("solid","dotted"), merge = TRUE, bg = 'gray90') }, pc = { legend("topright", c("PC Actual Data", " PC Resampled Data"), col = c("blue","red"),pch=c(4,NA), text.col = "green4", lty = c("solid","dotted"), merge = TRUE, bg = 'gray90') } ) } fa.test <- which(!(values > fam))[1]-1 pc.test <- which(!(e.values > pcm))[1] -1 cat("Parallel analysis suggests that ") cat("the number of factors = ",fa.test, " and the number of components = ",pc.test,"\n") } #modified Oct 2, 2013 to plot (if desired) the simulated as well as the resampled data #modified Oct 22, 2013 to allow the plot to choose beteween both, fa, and pc. psych/R/sim.item.R0000644000176200001440000002477214414304650013466 0ustar liggesusers"sim.item" <- function (nvar = 72 ,nsub = 500, circum = FALSE, xloading =.6, yloading = .6, gloading=0, xbias=0, ybias = 0,categorical=FALSE, low=-3,high=3,truncate=FALSE,threshold=NULL) { avloading <- (xloading+yloading)/2 errorweight <- sqrt(1-(avloading^2 + gloading^2)) #squared errors and true score weights add to 1 g <- rnorm(nsub) truex <- rnorm(nsub)* xloading +xbias #generate normal true scores for x + xbias truey <- rnorm(nsub) * yloading + ybias #generate normal true scores for y + ybias if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)} if (circum) #make a vector of radians (the whole way around the circle) if circumplex {radia <- seq(0,2*pi,len=nvar+1) rad <- radia[which(radia<2*pi)] #get rid of the last one } else rad <- c(rep(0,nvar/4),rep(pi/2,nvar/4),rep(pi,nvar/4),rep(3*pi/2,nvar/4)) #simple structure error<- matrix(rnorm(nsub*(nvar)),nsub) #create normal error scores #true score matrix for each item reflects structure in radians trueitem <- outer(truex, cos(rad)) + outer(truey,sin(rad)) item<- gloading * g + trueitem + errorweight*error #observed item = true score + error score if (categorical) { if(is.null(threshold)){ item = round(item) #round all items to nearest integer value item[(item<= low)] <- low item[(item>high) ] <- high } else { i <- 1:nvar item <- t(t(item [,i]) > threshold[i]) + 0 } } #if (truncate) {item[item < cutpoint] <- 0 } #replaced with threshold colnames(item) <- paste("V",1:nvar,sep="") return (item) } "sim.spherical" <- function (simple=FALSE, nx=7,ny=12 ,nsub = 500, xloading =.55, yloading = .55, zloading=.55, gloading=0, xbias=0, ybias = 0, zbias=0,categorical=FALSE, low=-3,high=3,truncate=FALSE,threshold=NULL) { nvar <- nx * (ny-1) if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)} errorweight <- sqrt(1-(xloading^2 + yloading^2 + zloading^2+ gloading^2)) #squared errors and true score weights add to 1 g <- rnorm(nsub) truex <- rnorm(nsub) +xbias #generate normal true scores for x + xbias truey <- rnorm(nsub) + ybias #generate normal true scores for y + ybias truez <- rnorm(nsub) + zbias #generate normal true scores for y + ybias true <- matrix(c(g,truex,truey,truez),nsub) #make a vector of radians (the whole way around the circle) if circumplex if(!simple) {f1 <- rep(cos(seq(0,pi,length.out = nx)),each=ny-1)*xloading f2 <- rep(cos(seq(0,2*pi*(ny-1)/ny,length.out=ny-1)),nx)*yloading f3 <- rep(sin(seq(0,2*pi*(ny-1)/ny,length.out=ny-1)),nx)*zloading f2 <- f2 * (1-f1^2) #added to make the cylinder a sphere f3 <- f3 * (1-f1^2) f <- matrix(c(rep(gloading,(ny-1)*nx),f1,f2,f3),(ny-1)*nx) } else { #simple structure -- nvar <- 4*ny f1 <- rep(c(1,0,-1,0),each=ny) f2 <- rep(c(1,0,-1,0),ny) f3 <- rep(c(0,1,0,-1),ny) f <- matrix(c(rep(gloading,ny*4),f1,f2,f3),4*ny) } error<- matrix(rnorm(nsub*(nvar)),nsub) #create normal error scores #true score matrix for each item reflects structure in radians item <- true %*% t(f) + errorweight*error #observed item = true score + error score if (categorical) { if(is.null(threshold)){ item = round(item) #round all items to nearest integer value item[(item<= low)] <- low item[(item>high) ] <- high } else { i <- 1:nvar item <- t(t(item [,i]) > threshold[i]) + 0 } } #if (truncate) {item[item < cutpoint] <- 0 } colnames(item) <- paste("V",1:nvar,sep="") return (item) } "sim.rasch" <- function (nvar = 5 , n = 500, low=-3,high=3,d=NULL, a=1,mu=0,sd=1) { if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} theta <- rnorm(n,mu,sd) item <- matrix(t(1/(1+exp(a*t(-theta %+% t( d))))),n,nvar) #now convert these probabilities to outcomes item[] <- rbinom(n*nvar, 1, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,tau=d,theta=theta) return (result) } "sim.irt" <- function (nvar = 5 ,n = 500,low=-3,high=3,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,mod="logistic",theta=NULL) { if(mod=="logistic") {result <- sim.npl(nvar,n,low,high,a,c,z,d,mu,sd,theta)} else {result <- sim.npn(nvar,n,low,high,a,c,z,d,mu,sd,theta)} return (result) } "sim.npn" <- function (nvar = 5 ,n = 500, low=-3,high=3,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,theta=NULL) { if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} # the latent variable item <- matrix(t(c+(z-c)*pnorm(a*t(theta %+% t(- d)))),n,nvar) #need to transpose and retranpose to get it right #now convert these probabilities to outcomes item[] <- rbinom(n*nvar, 1, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.npl" <- function (nvar = 5 ,n = 500, low=-3,high=3,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,theta=NULL) { if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} item <- matrix(t(c+(z-c)/(1+exp(a*t((-theta %+% t( d)))))),n,nvar) item[] <- rbinom(n*nvar, 1, item) #now convert these probabilities to outcomes colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly" <- function (nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5,mod="logistic",theta=NULL) { if(mod=="normal") {result <- sim.poly.npn(nvar,n,low,high,a,c,z,d,mu,sd,cat,theta)} else {result <- sim.poly.npl(nvar,n,low,high,a,c,z,d,mu,sd,cat,theta)} return (result) } "sim.poly.npn" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5,theta=NULL) { cat <- cat - 1 if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} # the latent variable item <- matrix(t(c+(z-c)*pnorm(a*t(theta %+% t(- d)))),n,nvar) #need to transpose and retranpose to get it right #now convert these probabilities to outcomes item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly.npl" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5,theta=NULL) { cat <- cat - 1 if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} item <- matrix(t(c+(z-c)/(1+exp(a*t((-theta %+% t( d)))))),n,nvar) item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly.ideal" <- function (nvar = 5 ,n = 500,low=-2,high=2,a=NULL,c=0,z=1,d=NULL, mu=0,sd=1,cat=5,mod="logistic") { if(mod=="normal") {result <- sim.poly.ideal.npn(nvar,n,low,high,a,c,z,d,mu,sd,cat)} else {result <- sim.poly.ideal.npl(nvar,n,low,high,a,c,z,d,mu,sd,cat)} return (result) } "sim.poly.ideal.npl.absolute" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5) { cat <- cat -1 #binomial is based upon one fewer than categories if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} theta <- rnorm(n,mu,sd) item <- matrix(t(c+(z-c)/(1+exp(a*t(abs(-theta %+% t( d)))))),n,nvar) item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly.ideal.npl" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5,theta=NULL) { cat <- cat -1 #binomial is based upon one fewer than categories if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} item <- 2*matrix((c+(z-c)*exp(a*t(-theta %+% t( d)))/(1+2*exp(a*t(-theta %+% t( d))) + exp(a*t(2*(-theta %+% t( d)))))),n,nvar,byrow=TRUE) p <- item item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(p=p,items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly.ideal.npn" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5) { warning("Not ready for prime time") cat <- cat -1 #binomial is based upon one fewer than categories if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} theta <- rnorm(n,mu,sd) # the latent variable item <- matrix(t(c+(z-c)*pnorm(abs(a*t(theta %+% t(- d))))),n,nvar) #need to transpose and retranpose to get it right #now convert these probabilities to outcomes item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } "sim.poly.npl.m" <- function (nvar = 5 ,n = 500, low=-2,high=2,a=NULL,c=0,z=1,d=NULL,mu=0,sd=1,cat=5,theta=NULL) { cat <- cat - 1 if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar-1))} else {if(length(d)==1) d <- rep(d,nvar)} if(is.null(a)) {a <- rep(1,nvar)} if(is.null(theta)) {theta <- rnorm(n,mu,sd)} item <- matrix(t(c+(z-c)/(1+exp(a*t((-theta %+% t( d)))))),n,nvar) item[] <- rbinom(n*nvar, cat, item) colnames(item) <- paste("V",1:nvar,sep="") result <- list(items=item,discrimination=a,difficulty=d,gamma=c,zeta=z,theta=theta) return (result) } #simulate a particular polychoric structure R with specified marginals (m) #Modified December 17,2013 to actually work "sim.poly.mat" <- function(R,m,n) { e <- eigen(R) v <- pmax(e$values,0) nvar <- ncol(R) ncat <- ncol(m) X <- matrix(rnorm(nvar*n),n) X <- t(e$vectors %*% sqrt(diag(v)) %*% t(X)) marg <- m Y <- matrix(0,ncol=n,nrow=nvar) for(i in 1:(ncat)) { Y[t(X) > marg[,i]] <- i } return(t(Y)) }psych/R/item.dichot.R0000644000176200001440000000177510700462364014147 0ustar liggesusers"item.dichot" <- function (nvar = 72, nsub = 500, circum = FALSE, xloading = 0.6, yloading = 0.6, gloading = 0, xbias = 0, ybias = 0, low = 0, high = 0) { avloading <- (xloading + yloading)/2 errorweight <- sqrt(1 - (avloading^2 + gloading^2)) g <- rnorm(nsub) truex <- rnorm(nsub) * xloading + xbias truey <- rnorm(nsub) * yloading + ybias if (circum) { radia <- seq(0, 2 * pi, len = nvar + 1) rad <- radia[which(radia < 2 * pi)] } else rad <- c(rep(0, nvar/4), rep(pi/2, nvar/4), rep(pi, nvar/4), rep(3 * pi/2, nvar/4)) error <- matrix(rnorm(nsub * (nvar)), nsub) trueitem <- outer(truex, cos(rad)) + outer(truey, sin(rad)) item <- gloading * g + trueitem + errorweight * error nvar2 <- nvar/2 iteml <- (item[,(1:nvar2)*2 -1] >= low) itemh <- (item[,(1:nvar2)*2] >= high) item <- cbind(iteml,itemh)+0 return(item) } #revised October 2 to make difficulty and direction of factor loading unconfoundedpsych/R/correct.cor.R0000644000176200001440000000250113571764453014164 0ustar liggesusers"correct.cor" <- function(x,y) { n=dim(x)[1] { diag(x) <- y if (n> 1) { for (i in 2:n) { k=i-1 for (j in 1:k) { x[j,i] <- x[j,i]/sqrt(y[i]*y[j]) } #fix the upper triangular part of the matrix }} return(x) }} "rangeCorrection" <- function(r,sdu,sdr,sdxu=NULL,sdxr=NULL,case=2) { if (!is.null(sdxu)) case <- 4 # switch(case, { result <- sqrt(1-(sdr^2/sdu^2) *(1-r^2))}, { result <- ( r * sdu/(sdr* sqrt(1-r^2 + r^2*(sdu^2/sdr^2))))}, {result <- NULL}, {result <- r * (sdr/sdu)*(sdxr/sdxu) + sqrt((1-(sdr/sdu)^2) * (1- (sdxr/sdxu)^2 )) } ) return(result) } #Find the Kaiser - Meyer -Olkin criterion #note that the correct formula is in Kaiser 1974, not 1970 "KMO" <- function(r) { cl <- match.call() if(!isCorrelation(r)) r <- cor(r,use="pairwise") Q <- try(solve(r)) if(inherits(Q, as.character("try-error"))) {message("matrix is not invertible, image not found") Q <- r} S2 <- diag(1/diag(Q)) IC <- S2 %*% Q %*% S2 Q <- Image <- cov2cor(Q) diag(Q) <- 0 diag(r) <- 0 sumQ2 <- sum(Q^2) sumr2 <- sum(r^2) MSA <- sumr2/(sumr2 + sumQ2) MSAi <- colSums(r^2)/(colSums(r^2) + colSums(Q^2)) results <- list(MSA =MSA,MSAi = MSAi, Image=Image,ImCov = IC,Call=cl) class(results) <- c("psych","KMO") return(results) }psych/R/kaiser.R0000644000176200001440000000123413015672526013211 0ustar liggesusers#added Promax option (11/5/16) "kaiser" <- function(f,rotate="oblimin", m=4,pro.m=4) { if((!is.matrix(f)) && (!is.data.frame(f))) {f <- as.matrix(f$loadings)} else {f <- as.matrix(f)} if(!requireNamespace('GPArotation')) stop('GPArotation is required for the Kaiser normalization') h2 <- diag(f %*% t(f)) weighted <- f/sqrt(h2) #rotated <- call(paste('GPArotation',rotate,sep="::"),list(weighted)) if(rotate == "Promax") {rotated <- Promax(weighted,m=pro.m)} else { rotated <- do.call(getFromNamespace(rotate,'GPArotation'),list(weighted))} normalized <- rotated$loadings * sqrt(h2) rotated$loadings <- normalized class(rotated) <- c("psych","fa") return(rotated)} psych/R/testRetest.r0000644000176200001440000003264514444435167014160 0ustar liggesusers#Developed January 2018 while writing an article on reliability #modified March 2021 in response to some helpful suggestions by Jacob Lee "testRetest" <- function(t1,t2=NULL,keys=NULL,id="id",time= "time",select=NULL,check.keys=TRUE,warnings=TRUE,lmer=TRUE,sort=TRUE) { cl <- match.call() #first, some basic checks of the data #there are several ways we can input the data #x and y as time 1 and time 2 #x includes a time variable and an id variable x <- t1 y <- t2 if(NCOL(x) ==1) {just.test <-TRUE } else {just.test <- FALSE} keys.orig <- keys #first check if we have a y variable, if not, then create it if(is.null(y)) {n.times <- table(x[time]) #sort the data by id and time if(id %in% colnames(x)) { if(sort) x <- psychTools::dfOrder(x,c(time,id),) } #don't sort if we don't have ids y <- x[x[,time] == names(n.times)[2],] x <- x[x[,time] == names(n.times)[1],] } else { if(sort) {x <- psychTools::dfOrder(x,c(time,id),) y <- psychTools::dfOrder(y,c(time,id),) }} #only take matching cases if(NROW(x) != NROW(y) ) {warning("The number of subjects in x do not match those in y") not.missing.x <- x[,id] %in% y[,id] not.missing.y <- y[,id] %in% x[,id] missing.idx <- x[!not.missing.x,id] missing.idy <- y[!not.missing.y,id] cat("\nThe non-matched subjects were\n",missing.idx, missing.idy,"\n I have deleted them") x <- x[not.missing.x,] y <- y[not.missing.y,] } n.obs <- NROW(x) if(!just.test) { if(!is.null(select)) {items <- select x <- x[select] y <- y[select] } if(is.null(keys)){ items <- colnames(x) [!colnames(x) %in% c(id,time)]} else {items <- keys } #first check if we should reverse any items and convert location numbers (if specified) to location names n.items <- length(items) if(is.character(items)) { temp <- rep(1,n.items) temp [strtrim(items,1)=="-"] <- -1 if(any( temp < 0) ) {items <- sub("-","",items) } } else {temp <- sign(items) items <- colnames(x)[abs(items)] } #check for bad input -- the Mollycoddle option if(any( !(items %in% colnames(x)) )) { cat("\nVariable names in keys are incorrectly specified. Offending items are ", items[which(!(items %in% colnames(x)))],"\n") stop("I am stopping because of improper input in the scoring keys. See the list above for the bad item(s). ")} x <- x[,items,drop=FALSE] y <- y[,items,drop=FALSE] #these are the means of the unreversed items if(NCOL(x) > 1) {mean.x <- colMeans(x,na.rm=TRUE) mean.y <- colMeans(y,na.rm=TRUE) } min.item <- min(x[items],na.rm=TRUE) max.item <- max(x[items],na.rm=TRUE) miny.item <- min(y[items],na.rm=TRUE) maxy.item <- max(y[items],na.rm=TRUE) if(any(temp < 0)) { #flip items that are negatively keyed x[items[temp <0]] <- max.item- x[items[temp < 0]] + min.item y[items[temp <0]] <- maxy.item- y[items[temp < 0]] + miny.item } #x and y are now scored in the direction of the keys select <- items if(any(colnames(x[select]) !=colnames(y[select]))) {stop("Variable names must match across tests")} p1 <- pca(x) p2 <- pca(y) #Even though, if given keys, we have already flipped them, we check one more time # if(is.null(keys) ) { keys <- rep(1,n.items) } else {keys <- temp } keys <- rep(1,n.items) if((any(p1$loadings < 0)) | (any(p2$loadings < 0))) {if (check.keys) {if(warnings) message("Some items were negatively correlated with total scale and were automatically reversed.\n This is indicated by a negative sign for the variable name.") keys[p1$loadings < 0] <- -1 } else { if(is.null(keys) && warnings ) {message("Some items were negatively correlated with the total scale and probably \nshould be reversed. \nTo do this, run the function again with the 'check.keys=TRUE' option") if(warnings) cat("Some items (",rownames(p1$loadings)[(p1$loadings < 0)],") were negatively correlated with the total scale and \nprobably should be reversed. \nTo do this, run the function again with the 'check.keys=TRUE' option") }} } if(any(keys < 0)) { #then find the x and y scores newx <- t(t(x[select]) * keys + (keys < 0)* (max.item + min.item) ) #these are now rescaled in the keyed direction - but we have already done this if given a keys vector newy <- t(t(y[select]) * keys + (keys < 0)* (maxy.item + miny.item)) } else { newx <- x[select] newy <- y[select] } xscore <- rowMeans(newx,na.rm=TRUE) yscore <- rowMeans(newy,na.rm=TRUE) r12 <- cor(xscore,yscore,use="pairwise") #then correlate them to get test retest r #Now find the alpha for the x and y scales x.alpha <- alpha.1(cov(newx,use="pairwise")) y.alpha <- alpha.1(cov(newy,use="pairwise")) xy.alpha <- rbind(unlist(x.alpha),unlist(y.alpha)) rownames(xy.alpha) <- c("x","y") colnames(xy.alpha) <- c("raw G3","std G3","G6","av.r","S/N","se","lower","upper","var.r") #then correlate each matched item dxy <- dist(newx,newy) rqq <- dxy$rqq #now find the item over subjects correlation rii <- rep(NA,n.items) for (j in (1:n.items)) { if(!(( is.na(sd(x[,items[j]],na.rm=TRUE))) | (is.na(sd(y[,items[j]],na.rm=TRUE))))) { rii[j] <- cor(x[,items[j]],y[,items[j]],use="pairwise")} } #ok, the data seem ok lets create a dummy variable and do the lmer on it n.obs <- min(NROW(newx),NROW(newy)) xy.df <- data.frame(id = rep(1:n.obs,2), time=c(rep(1,n.obs), rep(2,n.obs)), rbind(newx,newy),row.names=1:(2*n.obs)) #this is getting it ready for mlr1 } else {#The case of just two tests, no items xy.df <- data.frame(id=rep(1:n.obs,2),time=c(time=c(rep(1,n.obs), rep(2,n.obs))),rbind(x,y)[,1],row.names=1:(2*n.obs)) no.items <- TRUE } #Now, the data are ready for lmer #This is same as # ml <- mlr(xy.df,aov=FALSE,lmer=TRUE) We now need to grab the best of multilevel reliabiity to do the next part if(!just.test) { if(lmer) {ml <- mlr1(xy.df)} else {ml <- list(n.obs=n.obs,n.items=n.items)} if(is.null(keys.orig)) keys.orig <- rep(1,n.items) item.stats <- data.frame(rii=rii,p1=unclass(p1$loadings),p2=unclass(p2$loadings),mean1 = mean.x, mean2=mean.y, keys=keys,keys.orig=keys.orig) colnames(item.stats)[2:3] <- c("PC1", "PC2") key <- rownames(item.stats) key[item.stats$keys < 0] <- paste0("-", key[item.stats$keys < 0]) scores <- data.frame(pca1 = p1$scores,pca2 = p2$scores,t1scores =xscore, t2scores = yscore,rqq=rqq,dxy=dxy$dxy,t1sd=dxy$sdx,t2sd=dxy$sdy) result <- list(r12=r12,alpha=xy.alpha,rqq=rqq,dxy=dxy,item.stats=item.stats, scores=scores,xy.df=xy.df,key=key,ml=ml,Call=cl) } else { if(just.test) {r12 = cor(x,y,use="pairwise") ml <- mlr2(xy.df) result <- list(r12 =r12,ml=ml, Call=cl)} } class(result) <- c("psych", "testRetest") return(result) } ######## alpha.1 <- function(C,R=NULL) { n <- dim(C)[2] alpha.raw <- (1- tr(C)/sum(C))*(n/(n-1)) if(is.null(R)) R <- cov2cor(C) sumR <- sum(R) alpha.std <- (1- n/sumR)*(n/(n-1)) smc.R <- smc(R) G6 <- (1- (n-sum(smc.R))/sumR) av.r <- (sumR-n)/(n*(n-1)) R.adj <- R diag(R.adj) <- NA var.r <- var(as.vector(R.adj),na.rm=TRUE) mod1 <- matrix(av.r,n,n) Res1 <- R - mod1 GF1 = 1- sum(Res1^2)/sum(R^2) Rd <- R - diag(R) diag(Res1) <- 0 GF1.off <- 1 - sum(Res1^2)/sum(Rd^2) sn <- n*av.r/(1-av.r) # Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2))) #corrected 1/15/16 Q = (2 * n^2/((n - 1)^2 * (sum(C)^3))) * (sum(C) * (tr(C%*%C) + (tr(C))^2) - 2 * (tr(C) * sum(C%*%C))) #correction from Tamaki Hattori result <- list(raw=alpha.raw,std=alpha.std,G6=G6,av.r=av.r,sn=sn,Q=Q,GF1,GF1.off,var.r = var.r) return(result) } print_psych.testRetest<- function(x,digits=2,short=FALSE,...) { cat("\nTest Retest reliability ") cat("\nCall: ") print(x$Call) cat('\nNumber of subjects = ',x$ml$n.obs, " Number of items = ", x$ml$n.items) if(x$ml$n.items > 1) { #The normal case cat("\n Correlation of scale scores over time" , round(x$r12,digits)) cat("\n Alpha reliability statistics for time 1 and time 2 \n") rownames(x$alpha) <- c("Time 1", "Time 2") print( round(x$alpha,digits)) meanrii <- mean(x$item.stats$rii,na.rm=TRUE) meanrqq <- mean(x$rqq,na.rm = TRUE) sdrqq <- sd(x$rqq,na.rm = TRUE) meandqq <- mean(x$dxy$dxy,na.rm=TRUE) cat("\n Mean between person, across item reliability = ",round(meanrii,digits)) cat("\n Mean within person, across item reliability = ",round(meanrqq,digits)) cat( "\nwith standard deviation of " ,round(sdrqq,digits) ,"\n") cat("\n Mean within person, across item d2 = ",round(meandqq,digits)) temp <- x x <- x$ml if(!is.null(x$R1F)) cat("\nR1F = ",round(x$R1F,digits) , "Reliability of average of all items for one time (Random time effects)") if(!is.null(x$RkF)) cat("\nRkF = ",round(x$RkF,digits) , "Reliability of average of all items and both times (Fixed time effects)") if(!is.null(x$R1R)) cat("\nR1R = ",round(x$R1R,digits),"Generalizability of a single time point across all items (Random time effects)") if(!is.null(x$R2R)) cat("\nRkR = ",round(x$RkR,digits),"Generalizability of average time points across all items (Fixed time effects)") if(!is.null(x$Rc)) cat("\nRc = ",round(x$Rc,digits),"Generalizability of change (fixed time points, fixed items) ") if(!is.null(x$RkRn) ) cat("\nRkRn = ",round(x$RkRn,digits),"Generalizability of between person differences averaged over time (time nested within people)") if(!is.null(x$Rcn)) cat("\nRcn = ",round(x$Rcn,digits),"Generalizability of within person variations averaged over items (time nested within people)") x <- temp if(!is.null(x$ml$components)) {cat("\nMultilevel components of variance\n") print(round(x$ml$components,digits))} if(!short) { cat("\n With Item statistics \n") print(round(x$item.stats[-7],digits)) } else {cat("\n To see the item.stats, print with short=FALSE. \nTo see the subject reliabilities and differences, examine the 'scores' object.") } } else { cat("\nTest Retest Reliability of two tests", print(round(x$r12,digits))) cat("\nMultilevel components of variance\n") print(round(x$ml$components,digits)) } } ####### #grab the best parts of multilevel reliability mlr1 <- function(x,na.action= "na.omit" ) { long <- NULL id <- "id" time <- "time" n.obs <- NROW(x)/2 items <- colnames(x) [!colnames(x) %in% c("id","time")] n.items <- length(items) n.time <- 2 long <- data.frame(id = rep(1:n.obs,2), time=rep(1:2,each = n.obs),stack(x[items])) colnames(long)[4] <- "items" #just to make it clearer mod.lmer <- lme4::lmer(values ~ 1 + (1 | id) + (1 | time) + (1 | items) + (1 | id:time)+ (1 | id:items)+ (1 | items :time), data=long,na.action=na.action) vc <- lme4::VarCorr(mod.lmer) MS_id <- vc$id[1,1] MS_time <- vc$time[1,1] MS_items <- vc$items[1,1] # MS_pxt <- vc[[1]][[1]] # MS_pxitem <- vc[[2]][[1]] # MS_txitem <- vc[[3]][[1]] #changed to named locations (which were incorrect when doing by numbers) MS_pxt <- vc[["id:time"]][[1]] MS_pxitem <- vc[["id:items"]][[1]] MS_txitem <- vc[["items:time"]][[1]] error <- MS_resid <- (attributes(vc)$sc)^2 s.lmer <- s.aov <- summary(mod.lmer) MS.df <- data.frame(variance= c(MS_id, MS_time ,MS_items, MS_pxt, MS_pxitem, MS_txitem, MS_resid,NA)) rownames(MS.df) <- c("ID","Time","Items","ID x time", "ID x items", "time x items", "Residual","Total") MS.df["Total",] <- sum(MS.df[1:7,1],na.rm=TRUE) MS.df["Percent"] <- MS.df/MS.df["Total",1] lmer.MS <- MS.df #save these #fixed time, not random time, R1f <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + error/( n.items))) #average of both times Rkf <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + error/(n.time * n.items))) R1r <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + MS_time + MS_pxt + error/( n.items))) #per Sean Lane Rkr <- (MS_id + MS_pxitem/n.items)/((MS_id + MS_pxitem/n.items + MS_time/n.time + MS_pxt/n.time + error/( n.time * n.items))) Rc <- (MS_pxt)/(MS_pxt + error/n.items) result <- list(n.obs = n.obs, n.items=n.items, components = MS.df,R1F= R1f,RkF =Rkf,R1R = R1r,RkR = Rkr,Rc=Rc) return(result) } mlr2 <- function(x,na.action=na.omit) { #these treats the case of just two tests with no items, we want the variance components long <- x #it is actually already in long format n.obs <- NROW(long) /2 n.items <- 1 mod.lmer <- lme4::lmer(values ~ 1 + (1 | id) + (1 | time) , data=long,na.action=na.action) vc <- lme4::VarCorr(mod.lmer) MS_id <- vc$id[1,1] error <- MS_resid <- (attributes(vc)$sc)^2 MS.df <- data.frame(variance= c(MS_id, MS_resid,NA)) rownames(MS.df) <- c("ID","Residual","Total") MS.df["Total",] <- sum(MS.df[1:2,1],na.rm=TRUE) MS.df["Percent"] <- MS.df/MS.df["Total",1] Rxx <- MS_id/MS.df["Total",1] result <- list(n.obs=n.obs,n.items=n.items,components = MS.df) } #find level, scatter, and pattern by subject dist <- function(x,y) { x.level <- rowMeans(x,na.rm=TRUE) y.level <- rowMeans(y,na.rm=TRUE) n.obs <- NROW(x) sdxi <- apply(x,1,function(xx) sd(xx,na.rm=TRUE)) sdyi <- apply(y,1,function(xx) sd(xx,na.rm=TRUE)) dxy <- rowMeans((x - y)^2,na.rm=TRUE) rxy <- rep(NA,n.obs) tx <- t(x) ty <- t(y) for(i in 1:n.obs) { if(!( (is.na(sdxi[i])) | (sdxi[i]==0) | (is.na(sdyi[i]) | sdyi[i]==0) ) ) { rxy[i] <- cor(tx[,i],ty[,i],use="pairwise")} } dist.df <- data.frame(x.level=x.level,y.level=y.level,sdx=sdxi,sdy = sdyi,dxy=dxy,rqq=rxy) return(dist.df) }psych/R/alpha.R0000644000176200001440000003511314551342061013015 0ustar liggesusers"alpha" <- function(x,keys=NULL,cumulative=FALSE,title=NULL,max=10,na.rm=TRUE,check.keys=FALSE,n.iter=1,delete=TRUE,use="pairwise",warnings=TRUE,n.obs=NULL,impute=NULL,discrete=TRUE) { #find coefficient alpha given a data frame or a matrix alpha.1 <- function(C,R) { n <- dim(C)[2] alpha.raw <- (1- tr(C)/sum(C))*(n/(n-1)) sumR <- sum(R) alpha.std <- (1- n/sumR)*(n/(n-1)) smc.R <- smc(R) G6 <- (1- (n-sum(smc.R))/sumR) av.r <- (sumR-n)/(n*(n-1)) R.adj <- R[lower.tri(R)] # diag(R.adj) <- NA # var.r <- var(as.vector(R.adj),na.rm=TRUE) var.r <- var(R.adj,na.rm=TRUE) # med.r <- median(R.adj,na.rm=TRUE) #added 4/22/18 mod1 <- matrix(av.r,n,n) Res1 <- R - mod1 GF1 = 1- sum(Res1^2)/sum(R^2) Rd <- R - diag(R) diag(Res1) <- 0 GF1.off <- 1 - sum(Res1^2)/sum(Rd^2) sn <- n*av.r/(1-av.r) # Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2))) #corrected 1/15/16 Q = (2 * n^2/((n - 1)^2 * (sum(C)^3))) * (sum(C) * (tr(C%*%C) + (tr(C))^2) - 2 * (tr(C) * sum(C%*%C))) #correction from Tamaki Hattori result <- list(raw=alpha.raw,std=alpha.std,G6=G6,av.r=av.r,sn=sn,Q=Q,GF1,GF1.off,var.r = var.r,med.r=med.r) return(result) } #begin main function cl <- match.call() if(!is.matrix(x) && !is.data.frame(x)) stop('Data must either be a data frame or a matrix') if(!inherits(x[1], "data.frame")) x <- fix.dplyr(x) #to get around a problem created by dplyr if(!is.null(keys)){# 4 cases 1 it is a list, 2 is a vector of character, 3 it is keys matrix 4 it is a list of items to reverse if( is.list(keys)) { select <- sub("-","",unlist(keys)) #added 9/26/16 to speed up scoring one scale from many x <- x[,select] keys <- make.keys(x,keys)} else {if(!is.numeric(keys)){ temp <- rep(1,ncol(x)) temp[(colnames(x) %in% keys)] <- -1 #note that if the keys vect has negatives, they will not be reversed, but rather the positive ones will, net result is the same, but item correlations are backward keys <- temp }} } if(missing(impute) | (!is.null(impute) && (impute =="none"))) impute <- NULL nvar <- dim(x)[2] nsub <- dim(x)[1] scores <- NULL response.freq <- NULL raw <- FALSE if (!isCovariance(x)) { #find the correlations if we are given raw data raw <- TRUE if(!is.null(impute) ) { if(impute=="median") {item.impute <- apply(x,2,median,na.rm=na.rm)} else {item.impute <- apply(x,2,mean,na.rm=na.rm) } #column values find.na <- which(is.na(x),arr.ind=TRUE) if(NROW(find.na) >0) x[find.na] <- item.impute[find.na[,2]] #added 08/28/20 to fix a problem where we putting in imputed values for all the variables } item.var <- apply(x,2,sd,na.rm=na.rm) bad <- which((item.var <= 0)|is.na(item.var)) n.bad <-length(bad) if((n.bad > 0) && delete) { for (baddy in 1:n.bad) {warning( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted but still is counted in the score")} first.bad <- x[1,bad] x <- x[,-bad] nvar <- nvar - n.bad } if(discrete) {response.freq <- response.frequencies(x,max=max) } else {response.freq=NULL} #added 12/4/23 C <- cov(x,use=use) R <- cor(x,use=use)} else {C <- x R <- cov2cor(C)} #to find average R, we need to find R, not C cov2cor(C) != R ! added 1/15/24 if(is.null(colnames(x))) colnames(x) <- paste0("V",1:nvar) #flip items if needed and wanted #if(check.keys && is.null(keys)) { p1 <- principal(x,scores=FALSE) if(any(p1$loadings < 0)) {if (check.keys) {if(warnings) warning("Some items were negatively correlated with the first principal component and were automatically reversed.\n This is indicated by a negative sign for the variable name.") keys <- 1- 2* (p1$loadings < 0) } else { if(is.null(keys) && warnings ) {warning("Some items were negatively correlated with the first principal component and probably \nshould be reversed. \nTo do this, run the function again with the 'check.keys=TRUE' option") if(warnings) cat("Some items (",rownames(p1$loadings)[(p1$loadings < 0)],") were negatively correlated with the first principal component and \nprobably should be reversed. \nTo do this, run the function again with the 'check.keys=TRUE' option") keys <- rep(1,nvar) } } } #keys is now a vector of 1s and -1s #names(keys) <- colnames(x) # } if(is.null(keys)) {keys <- rep(1,nvar) names(keys) <- colnames(x)} else { keys<- as.vector(keys) names(keys) <- colnames(x) if(length(keys) < nvar) {temp <- keys #this is the option of keying just the reversals keys <- rep(1,nvar) names(keys) <- colnames(x) keys[temp] <- -1 } } key.d <- diag(keys) C <- key.d %*% C %*% key.d signkey <- strtrim(keys,1) signkey[signkey=="1"] <- "" colnames(x) <- paste(colnames(x),signkey,sep="") if (raw) { #raw data if(any(keys < 0 )) { min.item <- min(x,na.rm=na.rm) max.item <- max(x,na.rm=na.rm) adjust <- max.item + min.item flip.these <- which(keys < 0 ) x[,flip.these] <- adjust - x[,flip.these] #note that we do not reverse key bad items } if(n.bad > 0 ){sum.bad <- sum(first.bad, na.rm=TRUE) if(cumulative) {total <- rowSums(x,na.rm=na.rm) + sum.bad } else {total <- (nvar * rowMeans(x,na.rm=na.rm) + sum.bad) /(nvar+n.bad)}} else { if(cumulative) {total <- rowSums(x,na.rm=na.rm) } else {total <- rowMeans(x,na.rm=na.rm)} } mean.t <- mean(total,na.rm=na.rm) sdev <- sd(total,na.rm=na.rm) raw.r <- cor(total,x,use=use) t.valid <- colSums(!is.na(x))} else { #we are working with a correlation matrix total <- NULL totals <- TRUE } # R <- cov2cor(C) #need to actually find correlations to treat missing values correctly drop.item <- vector("list",nvar) alpha.total <- alpha.1(C,R) if(nvar > 2) { for (i in 1:nvar) { drop.item[[i]] <- alpha.1(C[-i,-i,drop=FALSE],R[-i,-i,drop=FALSE]) } } else {drop.item[[1]]<- c(C[1,2]/C[2,2],R[1,2],smc(R)[1],R[1,2],R[1,2]/(1-R[1,2]),NA,NA,NA,0,R[1,2]) drop.item[[2]] <- c(C[1,2]/C[1,1],R[1,2],smc(R)[1],R[1,2],R[1,2]/(1-R[1,2]),NA,NA,NA,0,R[1,2]) } #added the extra 2 NA June 18, 2017 -- and further fixed May 7, 2020 by.item <- data.frame(matrix(unlist(drop.item),ncol=10,byrow=TRUE)) #allows us to specify the number of subjects for correlation matrices if(max(nsub,n.obs) > nvar) {by.item[6] <- sqrt(by.item[6]/(max(nsub,n.obs)) ) by.item <- by.item[-c(7:8)] colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","alpha se","var.r","med.r") } else { by.item <- by.item[-c(6:8)] colnames(by.item) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","var.r","med.r") } rownames(by.item) <- colnames(x) Vt <- sum(R) item.r <- colSums(R)/sqrt(Vt) #this is standardized r #correct for item overlap by using smc RC <-R diag(RC) <-smc(R) Vtc <- sum(RC) item.rc <-colSums(RC)/sqrt(Vtc) #yet one more way to correct is to correlate item with rest of scale if(nvar > 1) { r.drop <- rep(0,nvar) for (i in 1:nvar) { v.drop <- sum(C[-i,-i,drop=FALSE]) c.drop <- sum(C[,i]) - C[i,i] r.drop[i] <- c.drop/sqrt(C[i,i]*v.drop) } } # item.means <- colMeans(x, na.rm=na.rm ) item.sd <- apply(x,2,sd,na.rm=na.rm) if(raw) { Unidim <- alpha.total[7] var.r <- alpha.total[[9]] Fit.off <- alpha.total[8] ase = sqrt(alpha.total$Q/nsub) #alpha.total <- data.frame(alpha.total[1:5],ase=ase,mean=mean.t,sd=sdev) # colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","ase","mean","sd") alpha.total <- data.frame(alpha.total[1:5],ase=ase,mean=mean.t,sd=sdev,med.r =alpha.total[10]) colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)","average_r","S/N","ase","mean","sd","median_r") rownames(alpha.total) <- "" stats <- data.frame(n=t.valid,raw.r=t(raw.r),std.r =item.r,r.cor = item.rc,r.drop = r.drop,mean=item.means,sd=item.sd) } else { if(is.null(n.obs)) { Unidim <- alpha.total[7] Fit.off <- alpha.total[8] var.r <- alpha.total[9] med.r <- alpha.total[10] alpha.total <- data.frame(alpha.total[c(1:5,10)]) #fixed 27/7/14 colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)" ,"average_r","S/N","median_r") } else { Unidim <- alpha.total[7] Fit.off <- alpha.total[8] var.r <- alpha.total[9] alpha.total <- data.frame(alpha.total[1:5],ase=sqrt(alpha.total$Q/n.obs),alpha.total[10]) colnames(alpha.total) <- c("raw_alpha","std.alpha","G6(smc)" ,"average_r","S/N","ase","median_r")} rownames(alpha.total) <- "" stats <- data.frame(r =item.r,r.cor = item.rc,r.drop = r.drop) #added r.drop 10/12/13 } rownames(stats) <- colnames(x) #added measures of unidimensionality Feb 24, 2016 #found in alpha.1 # the basic idea is how big are the residuals given the model #we can compare them to the total R or the off diagonal R. #end of unidimensionality statistics if(n.iter > 1) {#do a bootstrap confidence interval for alpha # if(!require(parallel)) {message("The parallel package needs to be installed to run mclapply")} if(!raw) {message("bootstrapped confidence intervals require raw data") boot <- NULL boot.ci <- NULL } else { boot <- vector("list",n.iter) boot <- mclapply(1:n.iter,function(XX) { xi <- x[sample.int(nsub,replace=TRUE),] C <- cov(xi,use="pairwise") if(!is.null(keys)) {key.d <- diag(keys) xi <- key.d %*% C %*% key.d} R <- cov2cor(C) alpha.1(C,R) }) #end of mclapply boot <- matrix(unlist(boot),ncol=10,byrow=TRUE) colnames(boot) <- c("raw_alpha","std.alpha","G6(smc)","average_r","s/n","ase","Unidim","Goodfit","var.r","median.r") boot.ci <- quantile(boot[,1],c(.025,.5,.975)) }} else {boot=NULL boot.ci <- NULL} names(Unidim) <- "Unidim" names(Fit.off) <- "Fit.off" feldt <- alpha.ci(alpha.total[1],nsub,nvar) keys <- keys2list(as.matrix(keys)) result <- list(total=alpha.total,alpha.drop=by.item,item.stats=stats,response.freq=response.freq,keys=keys,scores = total,nvar=nvar,boot.ci=boot.ci,boot=boot,feldt=feldt,Unidim=Unidim,var.r=var.r,Fit=Fit.off,call=cl,title=title) class(result) <- c("psych","alpha") return(result) } #modified Sept 8, 2010 to add r.drop feature #modified October 12, 2011 to add apply to the sd function #modified November 2, 2010 to use sd instead of SD #January 30, 2011 - added the max category parameter (max) #June 20, 2011 -- revised to add the check.keys option as suggested by Jeremy Miles #Oct 3, 2013 check for variables with no variance and drop them with a warning #May 27, 2022 convert the keys output to a named list so that we can use if in other functions #November 22, 2013 Added the standard error as suggested by #modified December 6, 2013 to add empirical confidence estimates #modified January 9, 2014 to add multicore capabilities to the bootstrap #corrected December 18 to allow reverse keying for correlation matrices as well as raw data #modified 1/16/14 to add S/N to summary stats #added item.c (raw correlation) 1/10/15 #corrected 1/16/16 corrected the formula for Q following a suggestion by Tamaki Hattori #added the n.obs option to allow us to find standard errors even from correlation matrices #7/4/23 changed the reference to total score to be first principal component #a kludge to get around a problem introduced by dplyr which changes the class structure of data frames. #created in response to a problem raised by Adam Liter (February, 2017) #probably not necessary anymore if we use inherits(x,"data.frame") "fix.dplyr" <- function (object) { if(is.data.frame(object)) { cn <- class(object) df <- which(cn=="data.frame") cn.not <- cn[-df] cn <- c("data.frame",cn.not) class(object) <- cn } invisible(object) } #apply the Duhacheck and Iacobucci estimates #compare with Feldt's estimate "alpha.ci" <- function(alpha,n.obs,n.var=NULL,p.val=.05,digits=2) { # Q = (2 * n^2/((n - 1)^2 * (sum(C)^3))) * (sum(C) * (tr(C%*%C) + (tr(C))^2) - 2 * (tr(C) * sum(C%*%C))) #correction from Tamaki Hattori CI.high <- 1- (1-alpha)* qf(p.val/2,df1=n.obs-1,df2=(n.obs-1)*(n.var-1)) CI.low <- 1- (1-alpha)* qf(1-p.val/2,df1=n.obs-1,df2=(n.obs-1)*(n.var-1)) #Inf changed inf to nvar -1 3/3/22 #CI.high <- 1- (1-alpha)* qf(p.val/2,df1=n.obs-1,df2=Inf) #CI.low <- 1- (1-alpha)* qf(1-p.val/2,df1=n.obs-1,df2=Inf) if(!is.null(n.var)) {r.bar <- alpha/(n.var - alpha*(n.var-1)) } else {r.bar=NA} result <- list(lower.ci =CI.low,alpha=alpha,upper.ci=CI.high,r.bar=r.bar) #print(result,digits=digits) class(result) <- c("psych","alpha.ci") invisible(result) } #convert alpha the the average r alpha2r <- function(alpha,n.var) { r.bar <- alpha/(n.var - alpha*(n.var-1)) r.bar}psych/R/parcels.R0000644000176200001440000001251212454611122013354 0ustar liggesusers"sim.parcels" <- function(x,n.samp=100,reps=10,delta=.5,full=FALSE,congruence=FALSE,max=TRUE) { result <- matrix(NA,nrow=reps,ncol=8) n.obs <- nrow(x) nvar <- ncol(x) for (i in 1:reps){ samp <- sample(n.obs,n.samp) r <- cor(x[samp,]) #The normal factor case f2 <- fa(r,2,rotate="geominQ",delta=delta) clust <- factor2cluster(f2) count <- sum(clust[1:(nvar/2),1]) + sum(clust[(nvar/2+1):nvar,2]) #the pairwise parcel case if(full) { keys <- parcels(r,2,max=max,congruence=congruence) keys <- keysort(keys) } else { keys1 <- parcels(r[1:(nvar/2),1:(nvar/2)],2,max=max,congruence=congruence) keys2 <- parcels(r[(nvar/2+1):nvar,(nvar/2+1):nvar],2,max=max,congruence=congruence) keys <- super.matrix(keys1,keys2) } x.p <- score.items(keys,x[samp,]) r2 <- cor(x.p$scores) f2.p <- fa(r2,2,rotate="geominQ",delta=delta) clust <- factor2cluster(f2.p) nrclust <- nrow(clust) count2 <- sum(clust[1:(nrclust/2),1]) + sum(clust[(nrclust/2+1):nrclust,2]) Roe <- cor(x.p$scores,x[samp,]) fe2 <- fa.extension(Roe,f2.p) clustex <- factor2cluster(fe2) nrclustex <- nrow(clustex) count2e <- sum(clustex[1:(nrclustex/2),1]) + sum(clustex[(nrclustex/2+1):nrclustex,2]) #the tri parcel solution if(full) { keys <- parcels(r,3,max=max,congruence=congruence) keys <- keysort(keys) } else { keys3.1 <- parcels(r[1:(nvar/2),1:(nvar/2)],3,max=max,congruence=congruence) keys3.2 <- parcels(r[(nvar/2+1):nvar,(nvar/2+1):nvar],3,max=max,congruence=congruence) keys <- super.matrix(keys3.1,keys3.2) } x.p3 <- scoreItems(keys,x[samp,],) f2.p3 <- fa(x.p3$scores,2,rotate="geominQ",delta=delta) clust3 <- factor2cluster(f2.p3) nrclust3 <- nrow(clust3) count3 <- sum(clust3[1:(nrclust3/2),1]) + sum(clust3[(nrclust3/2+1):nrclust3,2]) Roe <- cor(x.p3$scores,x[samp,]) fe3 <- fa.extension(Roe,f2.p3) clustex3 <- factor2cluster(fe3) nrclustex3 <- nrow(clustex3) count3e <- sum(clustex3[1:(nrclustex3/2),1]) + sum(clustex3[(nrclustex3/2+1):nrclustex3,2]) result[i,1] <- f2$Phi[1,2] result[i,2] <- f2.p$Phi[1,2] result[i,3] <- f2.p3$Phi[1,2] result[i,4] <- min(count,24-count) result[i,5] <- min(count2,nrclust-count2) result[i,6] <- min(count3,nrclust3-count3) result[i,7] <- min(count2e,nrclustex-count2e) result[i,8] <- min(count3e,nrclustex3-count3e) } colnames(result) <- c("fa Phi","P2 Phi","P3 Phi","error","error2","error3","error2e","error3e") return(result) } "keysort" <- function(keys) { items <- 1:nrow(keys) weights <- items %*% abs(keys) ord <- order(weights) keys[] <- keys[,ord] } "parcels" <- function (x,size=3,max=TRUE,flip=TRUE,congruence = FALSE) { if(nrow(x) !=ncol(x)) {x <- cor(x,use="pairwise")} if(congruence) { x <- factor.congruence(x,x) } if(max) {diag(x) <- 0 nvar <- nrow(x) row.range <- apply(x,1,range,na.rm=TRUE) row.max <- pmax(abs(row.range[1,]),abs(row.range[2,])) #find the largest absolute similarity diag(x) <- row.max sd.inv <- diag(1/sqrt(row.max)) similar <- sd.inv %*% x %*% sd.inv } if (size==2) {key <- parcels2(x,flip=flip)} else {key <- parcels3(x,flip=flip)} rownames(key) <- colnames(x) colnames(key) <- paste("P",1:ncol(key),sep="") return(key)} #this just works for parcels of size 2 "parcels2" <- function(x,size=2,flip) { nvar <- nrow(x) key <- matrix(0,nvar,nvar/size) similar <- x similar <- similar * lower.tri(similar) for(i in 1:(nvar/size)) { max.cell <- which.max(abs(similar)) #global maximum max.col <- trunc(max.cell/nrow(similar))+1 #is in which row and column? max.row <- max.cell - (max.col-1)*nrow(similar) #need to fix the case of first column if (max.row < 1) {max.row <- nrow(similar) max.col <- max.col-1 } key[max.col,i] <- 1 if(flip && (similar[max.row,max.col] < 0)) {key[max.row,i] <- -1} else {key[max.row,i] <- 1} similar[max.row,] <- similar[,max.row] <- NA similar[,max.col] <- similar[max.col,] <- NA } return(key) } "parcels3" <- function(x,flip) { nvar <- nrow(x) nkeys <- floor(nvar/3) keys <- matrix(0,nvar,nkeys) pointers <- 1:nvar point <- pointers for(i in 1:nkeys) { best <- trimax(abs(x[point,point]))[1:3] items <- point[best] keys[items,i] <- 1 if(flip) {if(x[items[1],items[2]] < 0 ) keys[items[2],i] <- -1 if(x[items[1],items[ 3]] < 0 ) keys[items[3],i] <- -1 #if(x[items[2],items[ 3]] < 0 ) keys[items[3],i] <- -keys[items[3],i] } point <- point[-best] } keys } "trimax" <- function(x) { nvar <- nrow(x) cs1 <- cumsum(1:nvar) cs2 <- cumsum(cs1) simil <- rep(NA,length=nvar*(nvar-1)* (nvar-2)/6) ind=1 for(i in 3:nvar) { for (j in 2:(i-1)) { for (k in 1:(j-1)) {simil[ind] <- x[i,j] + x[i,k] + x[j,k] ind <- ind+1} } } maxi <- which.max(simil) if(maxi ==1) { m1 <- 3 m2 <- 2 m3 <- 1 } else { m1 <- min(which(cs2 >= maxi)) +2 maxi2 <- (maxi - cs2[m1-3]) if( maxi2 < 2) {m2 <- 2 m3 <- 1} else { m2 <- min(which(cs1 >= maxi2))+1 if((maxi2- cs1[m2-2]) > 0 ) { m3 <- maxi2 - cs1[m2-2] } else {m3 <- m2 -1} }} results <- c(m1,m2,m3,maxi) #maxi is included for debugging return(results) } #Testing parcels if(FALSE) { ind=1 nvar<- 24 cs <- cumsum(cumsum(1:nvar)) print(cs) for(i in 3:nvar) { for (j in 2:(i-1)) { for (k in 1:(j-1)) { m1 <- min(which(cs >= ind)) +2 newind <- ind - cs[m1-3] m2 <- min(which(cs > newind)) +1 cat(ind, i,j,k , "m1 = ",m1,"newind = ",newind, "m2=",m2,", \n") ind <- ind+1} } } } psych/R/principal.R0000744000176200001440000003741614345725446013737 0ustar liggesusers#pca is just an alias for the principal function "pca" <- function(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA, covar=FALSE,scores=TRUE,missing=FALSE,impute="median",oblique.scores=TRUE,method="regression",use="pairwise",cor="cor",correct=.5,weight=NULL,...) { principal(r=r,nfactors=nfactors,residuals=residuals,rotate=rotate,n.obs=n.obs,covar=covar,scores=scores, missing=missing, impute=impute,oblique.scores=oblique.scores, method=method,use=use,cor=cor,correct=.5,weight=NULL,...) } #to make principal components more similar in output to normal pca in other systems. "principal" <- function(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA, covar=FALSE,scores=TRUE,missing=FALSE,impute="median",oblique.scores=TRUE,method="regression",use="pairwise",cor="cor",correct=.5,weight=NULL,...) { cl <- match.call() n <- dim(r)[2] if (!isCorrelation(r) && (!isCovariance(r))){ #added (isCovariance) April 9, 2019 to handle the case of a covariance matrix raw <- TRUE n.obs <- dim(r)[1] if(scores) {x.matrix <- as.matrix(r) #matrices are required for the substitution to work if(missing) { miss <- which(is.na(x.matrix),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x.matrix,na.rm=TRUE) #replace missing values with means x.matrix[miss]<- item.means[miss[,2]]} else { item.med <- apply(x.matrix,2,median,na.rm=TRUE) #replace missing with medians x.matrix[miss]<- item.med[miss[,2]]} }} # 2011.12.21 added the covar option switch(cor, cor = {if(!is.null(weight)) {r <- cor.wt(r,w=weight)$r} else { r <- cor(r,use=use)} }, cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, spearman = {r <- cor(r,use=use,method="spearman")}, kendall = {r <- cor(r,use=use,method="kendall")}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) # if(!covar) {r <- cor(r,use="pairwise")} else r <- cov(r,use="pairwise") # if given a rectangular matrix, then find the correlations or covariances first } else { raw <- FALSE if(!is.matrix(r)) { r <- as.matrix(r)} sds <- sqrt(diag(r)) #convert covariance matrices to correlation matrices if(!covar) r <- r/(sds %o% sds) } #added June 9, 2008 if (!residuals) { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),fit=0,fit.off=0)} else { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0,fit.off=0)} #added 24/4/15 to stop with bad data and give more meaningful help if(any(is.na(r))) { bad <- TRUE tempr <-r wcl <-NULL while(bad) { wc <- table(which(is.na(tempr), arr.ind=TRUE)) #find the correlations that are NA wcl <- c(wcl,as.numeric(names(which(wc==max(wc))))) tempr <- r[-wcl,-wcl] if(any(is.na(tempr))) {bad <- TRUE} else {bad <- FALSE} } cat('\nLikely variables with missing values are ',colnames(r)[wcl],' \n') stop("I am sorry: missing values (NAs) in the correlation matrix do not allow me to continue.\nPlease drop those variables and try again." ) } eigens <- eigen(r) #call the eigen value decomposition routine result$values <- eigens$values eigens$values[ eigens$values < .Machine$double.eps] <- .Machine$double.eps #added May 14, 2009 to fix case of singular matrices loadings <- eigens$vectors %*% sqrt(diag(eigens$values,nrow=length(eigens$values))) #added May 2, 2016 for the weird case of a single variable with covariance > 1 if(nfactors > 0) {loadings <- loadings[,1:nfactors]} else {nfactors <- n} if (nfactors > 1) {communalities <- rowSums(loadings^2)} else {communalities <- loadings^2 } uniquenesses <- diag(r) - communalities # 2011.12.21 uniqueness is now found if covar is true names(communalities) <- colnames(r) # 2009.02.10 Make sure this is a named vector -- correction by Gumundur Arnkelsson #added January 19, 2009 to flip based upon colSums of loadings if (nfactors > 1) {sign.tot <- vector(mode="numeric",length=nfactors) sign.tot <- sign(colSums(loadings)) sign.tot[sign.tot==0] <- 1 loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) < 0) {loadings <- -as.matrix(loadings)} else {loadings <- as.matrix(loadings)} colnames(loadings) <- "PC1" } colnames(loadings) <- paste("PC",1:nfactors,sep='') rownames(loadings) <- rownames(r) Phi <- NULL rot.mat <- NULL if(rotate != "none") {if (nfactors > 1) { if (rotate=="varimax" |rotate=="Varimax" | rotate=="quartimax" | rotate =="bentlerT" | rotate =="geominT" | rotate =="targetT" | rotate =="bifactor" | rotate =="TargetT"| rotate =="equamax"| rotate =="varimin"|rotate =="specialT" | rotate =="Promax" | rotate =="promax"| rotate =="cluster" |rotate == "biquartimin" |rotate =="specialQ" ) { Phi <- NULL colnames(loadings) <- paste("RC",1:nfactors,sep='') #for rotated component switch(rotate, #The orthogonal cases for GPArotation + ones developed for psych varimax = {rotated <- stats::varimax(loadings,...) #varimax is from stats, the others are from GPArotation loadings <- rotated$loadings rot.mat <- rotated$rotmat}, Varimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #varimax is from the stats package, Varimax is from GPArotations #rotated <- do.call(rotate,list(loadings,...)) #rotated <- do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...)) rotated <- GPArotation::Varimax(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , quartimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings)) rotated <- GPArotation::quartimax(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bentlerT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::bentlerT(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , geominT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::geominT(loadings,...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , targetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rotated <- GPArotation::targetT(loadings,Tmat=diag(ncol(loadings)),...) loadings <- rotated$loadings rot.mat <- t(solve(rotated$Th))} , bifactor = {rot <- bifactor(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, TargetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} rot <- GPArotation::targetT(loadings,Tmat=diag(ncol(loadings)),...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, equamax = {rot <- equamax(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, varimin = {rot <- varimin(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, specialT = {rot <- specialT(loadings,...) loadings <- rot$loadings rot.mat <- t(solve(rot$Th))}, Promax = {pro <- Promax(loadings,...) loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, promax = {pro <- stats::promax(loadings,...) #from stats loadings <- pro$loadings rot.mat <- pro$rotmat ui <- solve(rot.mat) Phi <- cov2cor(ui %*% t(ui))}, cluster = {loadings <- varimax(loadings,...)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi rot.mat <- pro$rotmat}, biquartimin = {ob <- biquartimin(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))}, # TargetQ = {ob <- TargetQ(loadings,...) # loadings <- ob$loadings # Phi <- ob$Phi # rot.mat <- t(solve(ob$Th))}, specialQ = {ob <- specialQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(pro$Th))}) } else { colnames(loadings) <- paste("TC",1:nfactors,sep='') #for transformed components #The following oblique cases all use GPArotation if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...))) if(inherits(ob, as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } else {message("Specified rotation not found, rotate='none' used") colnames(loadings) <- paste("PC",1:nfactors,sep='') } #not rotated } } } #just in case the rotation changes the order of the components, sort them by size of eigen value if(nfactors >1) { ev.rotated <- diag(t(loadings) %*% loadings) ev.order <- order(ev.rotated,decreasing=TRUE) loadings <- loadings[,ev.order]} if(!is.null(Phi)) {Phi <- Phi[ev.order,ev.order] } #January 20, 2009 but, then, we also need to change the order of the rotation matrix! signed <- sign(colSums(loadings)) c.names <- colnames(loadings) signed[signed==0] <- 1 loadings <- loadings %*% diag(signed) #flips factors to be in positive direction but loses the colnames colnames(loadings) <- c.names if(!is.null(Phi)) {Phi <- diag(signed) %*% Phi %*% diag(signed) colnames(Phi) <- rownames(Phi) <- c.names} class(loadings) <- "loadings" #Find the summary statistics of Variance accounted for #normally just found in the print function (added 4/22/17) #from the print function if(is.null(Phi)) {if(nfactors > 1) {vx <- colSums(loadings^2) } else {vx <- sum(loadings^2) }} else {vx <- diag(Phi %*% t(loadings) %*% loadings) } vtotal <- sum(communalities + uniquenesses) names(vx) <- colnames(loadings) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/vtotal) if (nfactors > 1) { varex <- rbind(varex, "Cumulative Var"= cumsum(vx/vtotal)) varex <- rbind(varex, "Proportion Explained"= vx/sum(vx)) varex <- rbind(varex, "Cumulative Proportion"= cumsum(vx/sum(vx))) } result$n.obs <- n.obs stats <- factor.stats(r,loadings,Phi,n.obs,fm="pc") class(result) <- c("psych", "principal") result$fn <- "principal" result$loadings <- loadings result$Phi <- Phi result$Call <- cl result$communality <- communalities result$uniquenesses <- uniquenesses result$complexity <- stats$complexity #result$stats <- stats result$valid <- stats$valid #added 12/12/22 result$chi <- stats$chi result$EPVAL <- stats$EPVAL result$R2 <- stats$R2 result$objective <- stats$objective result$residual <- stats$residual result$rms <- stats$rms result$fit <- stats$fit result$fit.off <- stats$fit.off result$factors <- stats$factors result$dof <- stats$dof result$null.dof <- stats$null.dof result$null.model <- stats$null.model result$criteria <- stats$criteria result$STATISTIC <- stats$STATISTIC result$PVAL <- stats$PVAL result$weights <- stats$weights result$r.scores <- stats$r.scores result$rot.mat <- rot.mat result$Vaccounted <-varex if(!is.null(Phi) && oblique.scores) { result$Structure <- loadings %*% Phi} else {result$Structure <- loadings } if(scores && raw) { result$weights <- try(solve(r,result$Structure),silent=TRUE) if(inherits(result$weights, "try-error")) {warning("The matrix is not positive semi-definite, scores found from Structure loadings") result$weights <- result$Structure} # else { result$scores <- scale(x.matrix,scale=!covar) %*% result$weights #} } # result$scores <- factor.scores(scale(x.matrix,scale=!covar),result$Structure,Phi=Phi,method=method,rho=r) # method = method added Nov 20, 2011 # result$weights<- result$scores$weights # result$scores <- result$scores$scores} return(result) } # modified August 10, 2007 # modified Feb 2, 2008 to calculate scores and become a factanal class #Modified June 8,2008 to get chi square values to work or default to statistic if n.obs==NULL. #modified Jan 2, 2009 to report the correlations between oblique factors #modified December 30 to let n.obs ==NA rather than NULL to be compatible with factanal #modified Jan 14, 2009 to change phi to Phi to avoid confusion #modified Jan 19, 2009 to allow for promax rotations #modified May 15, 2009 to allow for singular matrices #correct August 25, 2009 to return result$values #modified August 25 to return result$stats #modified November 20, 2011 to allow multiple scoring approaches to give component scores #modified December 21, 2011 to allow for finding the pc of covariances #modified June 19, 2012 to fix a missing value problem reported by Neil Stewart #modified February 9, 2013 to label the type of rotation/transformation (as is documented but not implemented) #modified February 25, 2013 to make scores=TRUE the default #modified 1/16/14 to output the structure matrix #modified 8/15 to add rot.mat to all rotations #modified 9/3/15 to label rotated PCs as RC and transformed as TC -- perhaps this was dropped out while fixing rotations? #added the pca function as an alias to principal 6/8/16 psych/R/fiml.R0000644000176200001440000001442712252364431012665 0ustar liggesusers#December 12, 2013 #taken almost completely from the matching lavaan functions #which unfortunately, are not public functions #some of the functionality of lavaan has been dropped #The relevant functions from lavaan are #getMissingPatterns #getMissingPatternStats #estimate.moments.fiml #minimize.this.function #first.derivative.param # first.derivative.param.numerical #estimator.FIML #derivative.FIML #vech corFiml <- function (x, covar = FALSE,show=FALSE) { if (!is.matrix(x)) x <- as.matrix(x) Mp <- getMissingPatterns(x) if (length(Mp$empty.idx) > 0L) { x <- x[-Mp$empty.idx, , drop = FALSE] } mpat <- getMissingPatternStats(X = x, Mp = Mp) if(show) {return(Mp$pat) } else { moments <- estimate.moments.fiml(X = x, M = mpat) colnames(moments$sigma) <- rownames(moments$sigma) <- colnames(x) cor <- cov2cor(moments$sigma) if (covar) { return(list(mean = moments$mu, cor = cor, cov = moments$sigma, fx = moments$fx)) } else {return(cor)} } } getMissingPatterns <- function (X) { nobs <- nrow(X) nvar <- ncol(X) MISSING <- 1L * is.na(X) #convert to number coverage <- crossprod(1 - MISSING)/nobs #this next step looks for the missing cases and removes someone with all missing id <- apply(MISSING, MARGIN = 1, function(x) { if (sum(x) == length(x)) { out <- "empty" } else { paste(x, collapse = "") } }) empty.idx <- which(id == "empty") if (length(empty.idx) > 0) { MISSING <- MISSING[-empty.idx, ] X <- X[-empty.idx, ] id <- id[-empty.idx] nobs <- nobs - length(empty.idx) } TABLE <- sort(table(id), decreasing = TRUE) order <- names(TABLE) npatterns <- length(TABLE) pat <- 1L - MISSING[match(order, id), , drop = FALSE] storage.mode(pat) <- "logical" row.names(pat) <- as.character(TABLE) out <- list(nobs = nobs, nvar = nvar, coverage = coverage, id = id, npatterns = npatterns, order = order, pat = pat, empty.idx = empty.idx) out } getMissingPatternStats <- function (X = NULL, Mp = NULL) { npatterns <- Mp$npatterns id <- Mp$id order <- Mp$order pat <- Mp$pat data <- vector("list", length = npatterns) for (p in 1:npatterns) { row.idx <- which(id == order[p]) nobs <- length(row.idx) Xp <- X[row.idx, pat[p, ], drop = FALSE] if (nobs > 1) { M <- colMeans(Xp) S <- crossprod(Xp)/nobs - tcrossprod(M) } else { S <- 0 M <- as.numeric(Xp) } data[[p]] <- list(X = Xp, SX = S, MX = M, nobs = nobs, var.idx = pat[p, ]) } data } estimate.moments.fiml <- function (X = NULL, M = NULL) { nvar <- ncol(X) pstar <- nvar * (nvar + 1)/2 start.cov <- cov(X, use = "p") dimnames(start.cov) <- NULL start.mean <- apply(X, 2, mean, na.rm = TRUE) names(start.mean) <- NULL lower.idx <- which(lower.tri(start.cov, diag = TRUE)) upper.idx <- which(upper.tri(t(start.cov), diag = TRUE)) x2param <- function(x) { mu <- x[1:nvar] sigma.el <- x[-(1:nvar)] sigma <- matrix(0, nvar, nvar) sigma[lower.idx] <- sigma.el sigma[upper.idx] <- t(sigma)[upper.idx] list(mu = mu, sigma = sigma) } minimize.this.function <- function(x) { out <- x2param(x) ev <- eigen(out$sigma)$values if (any(ev < 0)) { return(Inf) } fx <- estimator.FIML(Sigma.hat = out$sigma, Mu.hat = out$mu, M = M) fx } first.derivative.param <- function(x) { out <- x2param(x) dx.out <- derivative.FIML(Sigma.hat = out$sigma, Mu.hat = out$mu, M = M) dx <- c(dx.out$dx.mu, vech(dx.out$dx.Sigma)) dx } start.x <- c(start.mean, vech(start.cov)) iter.max <- 500 optim.out <- nlminb(start = start.x, objective = minimize.this.function, gradient = first.derivative.param, control = list(iter.max = iter.max, eval.max = iter.max * 2, trace = 0)) x <- optim.out$par fx <- optim.out$objective out <- x2param(x) sigma <- out$sigma mu <- out$mu list(sigma = sigma, mu = mu, fx = fx) } estimator.FIML <- function (Sigma.hat = NULL, Mu.hat = NULL, M = NULL, h1 = NULL) { npatterns <- length(M) fx.p <- numeric(npatterns) w.p <- numeric(npatterns) for (p in 1:npatterns) { SX <- M[[p]][["SX"]] MX <- M[[p]][["MX"]] w.p[p] <- nobs <- M[[p]][["nobs"]] var.idx <- M[[p]][["var.idx"]] Sigma.inv <- inv.chol(Sigma.hat[var.idx, var.idx], logdet = TRUE) Sigma.log.det <- attr(Sigma.inv, "logdet") Mu <- Mu.hat[var.idx] TT <- SX + tcrossprod(MX - Mu) trace <- sum(Sigma.inv * TT) fx.p[p] <- Sigma.log.det + trace } fx <- weighted.mean(fx.p, w = w.p) if (!is.null(h1)) { fx <- fx - h1 if (fx < 0) fx <- 0 } fx } inv.chol <- function (S, logdet = FALSE) { cS <- chol(S) S.inv <- chol2inv(cS) if (logdet) { attr(S.inv, "logdet") <- sum(log(diag(cS)^2)) } S.inv } derivative.FIML <- function (Sigma.hat, Mu.hat, M) { ntotal <- sum(sapply(M, "[[", "nobs")) nvar <- length(Mu.hat) npatterns <- length(M) dx.Sigma <- matrix(0, nvar, nvar) dx.Mu <- matrix(0, nvar, 1) for (p in 1:npatterns) { SX <- M[[p]][["SX"]] MX <- M[[p]][["MX"]] nobs <- M[[p]][["nobs"]] var.idx <- M[[p]][["var.idx"]] Sigma.inv <- inv.chol(Sigma.hat[var.idx, var.idx], logdet = FALSE) Mu <- Mu.hat[var.idx] TT <- SX + tcrossprod(MX - Mu) dx.Mu[var.idx, 1] <- (dx.Mu[var.idx, 1] + nobs/ntotal * -2 * t(t(MX - Mu) %*% Sigma.inv)) dx.Sigma[var.idx, var.idx] <- (dx.Sigma[var.idx, var.idx] - nobs/ntotal * 2 * (Sigma.inv %*% (TT - Sigma.hat[var.idx, var.idx]) %*% Sigma.inv)) } diag(dx.Sigma) <- diag(dx.Sigma)/2 out <- list(dx.mu = dx.Mu, dx.Sigma = dx.Sigma) out } vech <- function (S, diagonal = TRUE) { ROW <- row(S) COL <- col(S) if (diagonal) S[ROW >= COL] else S[ROW > COL] } psych/R/errorCircles.r0000644000176200001440000001564413650667673014457 0ustar liggesusers"error.crosses.by" <- function (x,y,z,labels=NULL,main=NULL,xlim=NULL,ylim= NULL,xlab=NULL,ylab=NULL,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,...) # x and y are data frame or descriptive stats {if(is.null(x$mean)) {x <- describe.by(x,z,mat=TRUE) } xmin <- min(x$mean) xmax <- max(x$mean) if(sd) {max.sex <- max(x$sd,na.rm=TRUE) if(is.null(xlim)) {xlim=c(xmin - max.sex,xmax + max.sex) }} else {max.sex <- max(x$se,na.rm=TRUE)} if(is.null(y$mean)) {y <- describe(y)} ymin <- min(y$mean) ymax <- max(y$mean) if(sd) {max.sey <- max(y$sd,na.rm=TRUE) if(is.null(ylim)) {ylim=c(ymin - max.sey,ymax +max.sey)}} else { max.sey <- max(y$se,na.rm=TRUE) } if(is.null(xlim)) xlim=c(xmin - 2*max.sex,xmax +2*max.sex) if(is.null(ylim)) ylim=c(ymin - 2*max.sey,ymax +2*max.sey) if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } if(is.null(xlab)) xlab <- "Group 1" if(is.null(ylab)) ylab <- "Group 2" plot(x$mean,y$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,...) cix <- qt(1-alpha/2,x$n-1) #modified Sept 11, 2013 ciy <- qt(1-alpha/2,y$n-1) z <- dim(x)[1] if(sd) {x$se <- x$sd y$se <- y$sd cix <- ciy <- rep(1,z) } if (is.null(pos)) {locate <- rep(1,z)} else {locate <- pos} if (is.null(labels)) {labels <- rownames(x)} if (is.null(labels)) {lab <- paste("V",1:z,sep="")} else {lab <-labels} for (i in 1:z) {xcen <- x$mean[i] ycen <- y$mean[i] xse <- x$se[i] yse <- y$se[i] arrows(xcen-cix[i]* xse,ycen,xcen+ cix[i]* xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) arrows(xcen,ycen-ciy[i]* yse,xcen,ycen+ ciy[i]*yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) text(xcen,ycen,labels=lab[i],pos=locate[i],cex=1,offset=offset) #puts in labels for all points } } "ellipse" <- function (x,y,r1,r2,...) { #code adapted from John Fox segments=51 angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) xs <- r1 #ys <- e.size * yrange ellipse <- unit.circle ellipse[,1] <- ellipse[,1]*r1 + x ellipse[,2] <- ellipse[,2]*r2+ y #ys? lines(ellipse, ...) return(xs) } "errorCircles" <- function (x,y,data,ydata=NULL,group=NULL,paired=FALSE, labels=NULL,main=NULL,xlim=NULL,ylim= NULL,xlab=NULL,ylab=NULL,add=FALSE,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,bars=TRUE,circles=TRUE,colors=NULL,col.arrows=NULL,col.text=NULL,circle.size=1,...) { # x and y are data frame or descriptive stats xvar <- x yvar <- y if(is.null(colors)) colors <- "black" if(is.null(col.arrows)) col.arrows <- colors if(is.null(col.text)) col.text <- colors # if((length(xvar) ==1) && (length(yvar)==1) && !is.null(group)) {data <- statsBy(data[,c(group,xvar,yvar)],group=group)} else { if(!is.null(group)) {data <- statsBy(data,group=group)} # } x <- list() if(paired) { x$mean <- t(data$mean[,xvar]) x$sd <- t(data$sd[,xvar]) x$n <- t(data$n[,xvar]) } else { #the normal case x$mean <- data$mean[,xvar] x$sd <- data$sd[,xvar] x$n <- data$n[,xvar]} xmin <- min(x$mean,na.rm=TRUE) xmax <- max(x$mean,na.rm=TRUE) x$se <- x$sd/sqrt(x$n) if(sd) {max.sex <- max(x$sd,na.rm=TRUE) if(is.null(xlim)) {xlim=c(xmin - max.sex,xmax + max.sex) }} else {max.sex <- max(x$se,na.rm=TRUE)} y <- list() if(!is.null(ydata)) { y$mean <- ydata$mean[,yvar] y$sd <- ydata$sd[,yvar] y$n <- ydata$n[,yvar] } else { y$mean <- data$mean[,yvar] y$sd <- data$sd[,yvar] y$n <- data$n[,yvar]} ymin <- min(y$mean,na.rm=TRUE) ymax <- max(y$mean,na.rm=TRUE) y$se <- y$sd/sqrt(y$n) if(sd) {max.sey <- max(y$sd,na.rm=TRUE) if(is.null(ylim)) {ylim=c(ymin - max.sey,ymax +max.sey)}} else { max.sey <- max(y$se,na.rm=TRUE) } if(is.null(xlim)) xlim=c(xmin - 2*max.sex,xmax +2*max.sex) if(is.null(ylim)) ylim=c(ymin - 2*max.sey,ymax +2*max.sey) if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } if(paired) {if(is.null(xlab)) xlab <- "Group 1" if(is.null(ylab)) ylab <- "Group 2" } else { if(is.null(xlab)) xlab <- colnames(data$mean)[xvar] if(is.null(ylab)) ylab <- colnames(data$mean)[yvar] } if(add) { if(paired) {points(x$mean,typ="p",col=colors,...) } else {points(x$mean,y$mean,typ="p",col=colors,...)} } else { if(paired) {plot(x$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,typ="p",col=colors,...) } else {plot(x$mean,y$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,typ="p",col=colors,...)} } N <-x$n Nmax <- max(N) cix <- qt(1-alpha/2,x$n-1) ciy <- qt(1-alpha/2,y$n-1) if(paired) {z <- nrow(x$mean) } else {z <- length(x$mean)} if(sd) {x$se <- x$sd y$se <- y$sd cix <- ciy <- rep(1,z) } if (is.null(pos)) {locate <- rep(1,z)} else {locate <- pos} if (is.null(labels)) {labels <- rownames(x$mean)} if (is.null(labels)) {lab <- paste("V",1:z,sep="")} else {lab <-labels} if(length(colors) < z) colors <- rep(colors,z) if(length(col.text) < z) col.text <- rep(col.text,z) if(length(col.arrows) < z) col.arrows <- rep(col.arrows,z) for (i in 1:z) { if(paired) { xcen <- x$mean[i,1] ycen <- x$mean[i,2] xse <- x$se[i,1] yse <- x$se[i,2] } else { xcen <- x$mean[i] ycen <- y$mean[i] xse <- x$se[i] yse <- y$se[i]} if(bars) {if(max(x$se,na.rm=TRUE) > 0) arrows(xcen-cix[i]* xse,ycen,xcen+ cix[i]* xse,ycen,length=arrow.len, angle = 90, code=3,col = col.arrows[i], lty = NULL, lwd = par("lwd"), xpd = NULL) if(max(y$se,na.rm=TRUE) >0 ) arrows(xcen,ycen-ciy[i]* yse,xcen,ycen+ ciy[i]*yse,length=arrow.len, angle = 90, code=3,col =col.arrows[i], lty = NULL, lwd = par("lwd"), xpd = NULL) } text(xcen,ycen,labels=lab[i],pos=locate[i],col=col.text[i],offset=offset,...) #puts in labels for all points if(circles) { xrange <- xlim[2] - xlim[1] yrange <- ylim[2] - ylim[1] xscale <-max(x$se,na.rm=TRUE) *circle.size yscale <-max(y$se,na.rm=TRUE) *circle.size ellipse(xcen,ycen,sqrt(xscale*x$n[i]/Nmax),sqrt( yscale*x$n[i]/Nmax),col=col.arrows[i]) } } if(!is.null(group)) return(invisible(data)) } psych/R/score.items.R0000644000176200001440000005510414313144266014170 0ustar liggesusers"score.items" <- function (keys,items,totals=FALSE,ilabels=NULL, missing=TRUE, impute="median",delete=TRUE, min=NULL,max=NULL,digits=2,select=TRUE) { message("score.items has been replaced by scoreItems, please change your call") scoreItems(keys=keys,items=items,totals=totals,ilabels=ilabels,missing=missing,impute=impute,delete=delete,min=min,max=max,digits=digits,select=select) } "scoreItems" <- function (keys,items,totals=FALSE,ilabels=NULL, missing=TRUE, impute="median",delete=TRUE, min=NULL,max=NULL,digits=2,n.obs=NULL,select=TRUE) { cl <- match.call() ## First some housekeeping for various types of input raw.data <- TRUE # if(is.list(keys) & !is.data.frame(keys)) keys <- make.keys(items,keys) #added 9/9/16 and then fixed March 4, following a suggestion by Jeromy Anglim if(is.null(colnames(items)) ) {select <- FALSE #can not select items if they don't have colnames or if the keys don't have rownames colnames(items) <- paste0("V",1:NCOL(items)) } #give the items some names if(is.null(dim(keys)) &(is.null(names(keys)))) {keys <- as.matrix(keys) #the case of unnamed key returned from alpha rownames(keys) <-colnames(items) colnames(keys) <- paste0("Scale1",1:NCOL(keys))} if(is.matrix(keys) & is.null(colnames(keys))) { colnames(keys) <- paste0("Scale",1:NCOL(keys)) if(is.null(rownames(keys))) rownames(keys) <- paste0("V",1:NCOL(items))} #this handles input from GAabbreviate if (select) {if(is.list(keys) & (!is.data.frame(keys))) { # select <- sub("-","",unlist(keys)) #then, replaced with select option, Apri 8, 2017 select <- selectFromKeyslist(colnames(items),keys) select <- select[!duplicated(select)] } else {keys <- keys2list(keys) select <- selectFromKeyslist(colnames(items),keys) select <- select[!duplicated(select)] } # if (!isCorrelation(r)) {r <- cor(r[select],use="pairwise")} else {r <- r[select,select]} #check for bad input -- the Mollycoddle option if(any( !(select %in% colnames(items)) )) { cat("\nVariable names in keys are incorrectly specified. Offending items are ", select[which(!(select %in% colnames(items)))],"\n") stop("I am stopping because of improper input. See above for a list of bad item(s). ")} keys <- make.keys(items[,select],keys)} else {select <- 1:ncol(items) } #modified once again April 6,2017 to allow for selecting items if(is.list(keys)) keys <- make.keys(ncol(items),keys,item.labels=colnames(items)) #added 12/26/21 to handle select=FALSE keys <- as.matrix(keys) #just in case they were not matrices to start with n.keys <- dim(keys)[2] n.items <- dim(keys)[1] abskeys <- abs(keys) keynames <- colnames(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale num.ob.item <- num.item #will be adjusted in case of impute = FALSE if (!missing) items <- na.omit(items) n.subjects <- dim(items)[1] if ((dim(items)[1] == dim(items)[2]) && !isCorrelation(items)) {warning("You have an equal number of rows and columns but do not seem to have a correlation matrix. I will treat this as a data matrix.")} # with the exception for the very unusual case of exactly as many items as cases reported by Jeromy Anglim if ((dim(items)[1] == dim(items)[2]) && isCorrelation(items)){ #this is the case of scoring correlation matrices instead of raw data (checking for rare case as well) raw.data <- FALSE totals <- FALSE #because we don't have the raw data, totals would be meaningless items <- items[select,select] #we have a correlation matrix, but we don't want all of it n.subjects <- 0 C <- as.matrix(items) cov.scales <- t(keys) %*% C %*% keys #fast, but does not handle the problem of NA correlations cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys) # this is sum(C^2) for finding ase response.freq <- NULL } else { items <- items[,select] #select just the items that we want to score. This is faster and robust to bad items #check to make sure all items are numeric -- if not, convert them to numeric if possible, flagging the item that we have done so if(!is.matrix(items)) { #does not work for matrices for(i in 1:n.items) { if(!is.numeric(items[[i]] )) { if(is.factor(unlist(items[[i]])) | is.character(unlist(items[[i]]))) { items[[i]] <- as.numeric(items[[i]]) colnames(items)[i] <- paste0(colnames(items)[i],"*") } else {items[[i]] <- NA} } } } items <- as.matrix(items) response.freq <- response.frequencies(items) item.var <- apply(items,2,sd,na.rm=TRUE) bad <- which((item.var==0)|is.na(item.var)) #is this a good idea? #bad <- which(is.na(item.var)) if((length(bad) > 0) && delete) { for (baddy in 1:length(bad)) {warning( "Item= ",colnames(items)[bad][baddy] , " had no variance and was deleted from the data and the keys.")} items <- items[,-bad] keys <- as.matrix(keys[-bad,]) n.items <- n.items - length(bad) abskeys <- abs(keys) colnames(keys) <- keynames } item.means <- colMeans(items,na.rm=TRUE) if (is.null(min)) {min <- min(items,na.rm=TRUE)} if (is.null(max)) {max <- max(items,na.rm=TRUE)} # miss.rep <- rowSums(is.na(items)) miss.rep <- (is.na(items) +0) %*% abs(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale num.ob.item <- num.item #will be adjusted in case of impute = FALSE if(impute !="none") { miss <- which(is.na(items),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(items,na.rm=TRUE) #replace missing values with means items[miss]<- item.means[miss[,2]]} else { item.med <- apply(items,2,median,na.rm=TRUE) #replace missing with medians items[miss]<- item.med[miss[,2]]} #this only works if items is a matrix scores <- items %*% keys #this actually does all the work but doesn't handle missing values C <- cov(items,use="pairwise") cov.scales <- cov(scores,use="pairwise") #and total scale variance cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys) # sum(C^2) for finding ase } else { #handle the case of missing data without imputation scores <- matrix(NaN,ncol=n.keys,nrow=n.subjects) if(raw.data && totals == TRUE) warning("Specifying totals = TRUE without imputation can lead to serious problems. Are you sure?") #just in case it was not already false #do we want to allow totals anyway? #we could try to parallelize this next loop for (scale in 1:n.keys) { pos.item <- items[,which(keys[,scale] > 0)] neg.item <- items[,which(keys[,scale] < 0)] neg.item <- max + min - neg.item sub.item <- cbind(pos.item,neg.item) if(!totals) {scores[,scale] <- rowMeans(sub.item,na.rm=TRUE)} else {scores[,scale] <- rowSums(sub.item,na.rm=TRUE)} rs <- rowSums(!is.na(sub.item)) num.ob.item[scale] <- mean(rs[rs>0]) #added Sept 15, 2011 # num.ob.item[scale] <- mean(rowSums(!is.na(sub.item))) # dropped } # end of scale loop # we now need to treat the data as if we had done correlations at input C <- cov(items,use="pairwise") cov.scales <- t(keys) %*% C %*% keys cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys) # sum(C^2) for finding ase raw.data <- FALSE } #end of treating missing without imputation } #end of processing raw data slabels <- colnames(keys) if (is.null(slabels)) { if (totals) {slabels<- paste("S",1:n.keys,sep="")} else { slabels <- paste("A",1:n.keys,sep="")} } item.var <- diag(C) #find the item variances var.scales <- diag(cov.scales) cor.scales <- cov2cor(cov.scales) sum.item.var <- item.var %*% abskeys sum.item.var2 <- item.var^2 %*% abskeys item.r <- cov2cor(C) #this does not handle pairwise complete correctly because cov and cor with pairwise work differently #find the median correlation within every scale med.r <- rep(NA, n.keys) for(k in 1:n.keys) { temp <- keys[,k][abs(keys[,k]) > 0] temp.keys <- temp temp <- diag(temp,nrow=length(temp)) # temp <- diag(keys[,k ][abs(keys[,k])>0] ) small.r <- item.r[abs(keys[,k])>0,abs(keys[,k])>0,drop=FALSE] #drop = FALSE for the case of 1 item scales # small.r <- temp %*% small.r %*% temp #this flips, but does not work if any r is na small.r[temp.keys < 0,] <- -small.r[temp.keys< 0,,drop=FALSE] #these two lines flip negatively keyed items even if some are NA (August 25, 2020) small.r[,temp.keys < 0] <- -small.r[,temp.keys< 0,drop=FALSE] med.r[k] <- median(small.r[lower.tri(small.r)],na.rm=TRUE) } names(med.r) <- slabels #but we want to do this for each scale #av.r <- (var.scales - sum.item.var)/(num.item*(num.item-1)) #actually, this the average covar alpha.scale <- (var.scales - sum.item.var)*num.item/((num.item-1)*var.scales) av.r <- alpha.scale/(num.item - alpha.scale*(num.item-1)) #alpha 1 = average r alpha.ob <- av.r * num.ob.item/(1+(num.ob.item-1)* av.r) colnames(alpha.scale) <- slabels alpha.scale[is.nan(alpha.scale)] <- 1 #Find standard errors of alpha following Duhacheck and Iacobbci #Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2))) #this works if we have the raw data Q = (2 * num.item^2/((num.item-1)^2*((var.scales)^3))) * (var.scales * (sum.item.var2 + sum.item.var^2) - 2* sum.item.var * cov.scales2) ase <- NULL #to have something if we don't have raw data #now find the Guttman 6 * reliability estimate as well as the corrected item-whole correlations if(raw.data) { if(length(bad) >0 ) {items <- items[,-bad] # item.var <- item.var[-bad] # C <- C[-bad,-bad] } #this removes those items with no variance from the item statistics item.cor <- cor(items,scores) } else {if (n.keys >1) { item.cor <- C %*% keys %*% diag(1/sqrt(var.scales))/sqrt(item.var)} else {item.cor <- C %*% keys /sqrt(var.scales * item.var)}} colnames(item.cor) <- slabels c.smc <- smc(C,TRUE) diag(C) <- c.smc sum.smc <- c.smc %*% abskeys G6 <- (var.scales - sum.item.var + sum.smc)/var.scales corrected.cov <- t(keys) %*% C %*% keys corrected.var <- diag(corrected.cov) #corrected.var <- diag(t(keys) %*% C %*% keys) if(n.keys>1) { item.rc <- (C %*% keys) %*% sqrt(diag(1/corrected.var))/sqrt(item.var)} else { item.rc <- C %*% keys /sqrt(corrected.var*item.var) } colnames(item.rc) <- slabels #put all of this much later key.var <- diag(t(keys) %*% keys) # key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r scale.size <- outer(key.var,key.var) MIMS <- corrected.cov/scale.size diag(MIMS)<- av.r if(n.subjects > 0) {ase <- sqrt(Q/ n.subjects )} else {if(!is.null(n.obs)) {ase <- sqrt(Q/ n.obs )} else {ase=NULL}} #only meaningful if we have raw data if(is.null(ilabels)) {ilabels <- colnames(items) } if(is.null(ilabels)) {ilabels <- paste("I",1:n.items,sep="")} #if(length(bad) >0 ) {rownames(item.rc) <- ilabels[-bad]} else {rownames(item.rc) <- ilabels} if(raw.data) { correction <- (colSums(abs(keys)-(keys))/2)*(max+min) #correct for flipping scores <- scores + matrix(rep(correction,n.subjects),byrow=TRUE,nrow=n.subjects) if (!totals) { if(n.keys > 1) {scores <- scores %*% diag(1/num.item) #find averages } else {scores <- scores/num.item } } colnames(scores) <- slabels } else {if (impute !="none") scores <- NULL} scale.cor <- correct.cor(cor.scales,t(alpha.scale)) rownames(alpha.scale) <- "alpha" rownames(av.r) <- "average.r" # rownames(med.r) <- "median.r" rownames(G6) <- "Lambda.6" sn <- av.r * num.item/(1-av.r) rownames(sn) <- "Signal/Noise" #find the Multi-Item Multi Trait item x scale correlations # added 9/5/22 MIMT <- matrix(NA,n.keys,n.keys) for (i in 1:(n.keys)) { temp <- keys[,i][abs(keys[,i]) > 0] flip.item <- temp * item.cor[names(temp),,drop=FALSE] if(length(names(temp)) > 1) { MIMT[i,] <- colMeans(flip.item[names(temp),,drop=FALSE])} else {MIMT[i,] <- flip.item} } colnames(MIMT) <- rownames(MIMT) <- colnames(keys) keys.list <- keys2list(keys) #put them into a list to save them if (!raw.data) { if(impute =="none") { #rownames(alpha.ob) <- "alpha.observed" if(!is.null(scores)) colnames(scores) <- slabels #added Sept 23, 2013 results <- list(scores=scores,missing = miss.rep,alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item, item.cor = item.cor,cor = cor.scales, corrected = scale.cor,G6=G6,item.corrected = item.rc,response.freq=response.freq,raw=FALSE,alpha.ob = alpha.ob,num.ob.item =num.ob.item,ase=ase,med.r=med.r,keys=keys.list,MIMS=MIMS,MIMT=MIMT,Call=cl)} else { results <- list(alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item, item.cor = item.cor,cor = cor.scales ,corrected = scale.cor,G6=G6,item.corrected = item.rc ,response.freq =response.freq,raw=FALSE, ase=ase,med.r=med.r,keys=keys.list,MIMS=MIMS,MIMT=MIMT, all=cl)} } else { if(raw.data) {if (sum(miss.rep) > 0) {results <-list(scores=scores,missing = miss.rep,alpha=alpha.scale, av.r=av.r, sn=sn,n.items = num.item, item.cor = item.cor,cor = cor.scales ,corrected = scale.cor,G6=G6,item.corrected = item.rc,response.freq=response.freq,raw=TRUE,ase=ase,med.r=med.r,keys=keys.list,MIMS=MIMS,MIMT=MIMT,Call=cl)} else{ results <- list(scores=scores,alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item, item.cor = item.cor, cor =cor.scales,corrected = scale.cor,G6=G6,item.corrected = item.rc ,response.freq=response.freq,raw=TRUE,ase=ase,med.r=med.r,keys=keys.list,MIMS=MIMS,MIMT=MIMT,Call=cl)} } } class(results) <- c("psych", "score.items") return(results) } #modified June 1 to add row names to items #modified June 22 to add median imputation #modified August 8 to add colnames to scores #modified Sept 23, 2007 to allow for short output #modified December 10, 2007 to default to ilabels as colnames(items) #modified March, 2009 to better#Find scores from correlations for each subject #impute missing values? #The correlations for each subject are found by statsBy #and are stored as a list of matrices in the r element "scoreBy" <- function(keys,stats,correct=TRUE,SMC=TRUE,av.r=TRUE,item.smc=NULL,impute=TRUE,select=TRUE) { #function to score clusters according to the key matrix, correcting for item overlap cl <- match.call() if(!inherits(stats, "statsBy")) stop("Please run statsBy first") if(is.null(stats$r)) stop("I am sorry, but you need to run statsBy with the cors=TRUE option first") MIN.n <- 3 r.list <- stats$r #the call uses the output of statsBy Select <- select keys.list<- keys #we are saving these for each pass ngroups <-length(r.list) tol=sqrt(.Machine$double.eps) #machine accuracy #now, do repeated scoring, one pass for each group result <- list(Call = cl) cor.list <- list() alpha.list <- list() r.list <- list() for(group in 1:ngroups) { num.n <- stats$n[group,] #if(sum(!is.na(num.n)) < 10 || mean(num.n,na.rm=TRUE) < MIN.n){ if(mean(num.n,na.rm=TRUE) < MIN.n){ cor.list[[group]] <- NA alpha.list[[group]] <- NA #add an empty element r.list[[group]] <- NA next() } r <- stats$r[[group]] keys <- keys.list #these are the original keys select <- Select #the original values bad <- FALSE if(is.list(keys) & (!is.data.frame(keys))) { if (select) { select <- selectFromKeyslist(colnames(r),keys) # select <- sub("-","",unlist(keys)) #added April 7, 2017 select <- select[!duplicated(select)] } else {select <- 1:ncol(r) } if (!isCorrelation(r)) {r <- cor(r[,select],use="pairwise")} else {r <- r[select,select]} keys <- make.keys(r,keys)} #added 9/9/16 (and then modified March 4, 2017 if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix #only use correlations #if ((dim(r)[1] != dim(r)[2]) ) {r <- cor(r,use="pairwise")} #if(any(abs(r[!is.na(r)]) > 1)) warning("Something is seriously wrong with the correlation matrix, some correlations had absolute values > 1! Please check your data.") if(any(is.na(r))) { SMC=FALSE # warning("Missing values in the correlation matrix do not allow for SMC's to be found") bad <- TRUE} if(SMC && is.null(item.smc)) {item.smc <- smc(r)} else { diag(r) <- NA item.smc <- apply(r,1,function(x) max(abs(x),na.rm=TRUE)) item.smc[is.infinite(item.smc) ] <- 1 diag(r) <- 1} if(all(item.smc ==1)) SMC <- FALSE if(!bad) {covar <- t(keys) %*% r %*% keys} else #matrix algebra is our friend {covar<- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))*x,na.rm=TRUE)) #matrix multiplication without matrices! } var <- diag(covar) #these are the scale variances n.keys <- ncol(keys) item.var <- item.smc raw.r <- cov2cor(covar) key.var <- diag(t(keys) %*% keys) key.smc <- t(keys) %*% item.smc key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) key.lambda6 <- (var - key.var + key.smc)/var key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r colnames(raw.r) <- rownames(raw.r) <- colnames(keys) names(key.lambda6) <- colnames(keys) key.lambda6 <- drop(key.lambda6) n.keys <- ncol(keys) sn <- key.av.r * key.var/(1-key.av.r) if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations raw.cov <- item.cov %*% keys} else { item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE)) #some correlations are NA raw.cov <- apply(keys,2,function(x) colSums(item.cov*x,na.rm=TRUE)) item.cov <- t(item.cov) } adj.cov <- raw.cov #now adjust them med.r <- rep(NA, n.keys) for (i in 1:(n.keys)) { temp <- keys[,i][abs(keys[,i]) > 0] temp <- diag(temp,nrow=length(temp)) small.r <- r[abs(keys[,i])>0,abs(keys[,i])>0] small.r <- temp %*% small.r %*% temp med.r[i] <- median(small.r[lower.tri(small.r)],na.rm=TRUE) for (j in 1:i) { if(av.r) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] * sqrt(key.av.r[i] * key.av.r[j])) } else { adj.cov[i,j] <- adj.cov[j,i] <- raw.cov[i,j] - sum(keys[,i] * keys[,j] )+ sum( keys[,i] * keys[,j] * sqrt(item.smc[i]* abs(keys[,i])*item.smc[j]*abs(keys[,j]) )) } } } scale.var <- diag(raw.cov) diag(adj.cov) <- diag(raw.cov) adj.r <- cov2cor(adj.cov) #this is the overlap adjusted correlations #adjust the item.cov for item overlap #we do this by replacing the diagonal of the r matrix with the item.var (probably an smc, perhaps a maximum value) diag(r) <- item.var if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations } else { item.cov <- t(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))) #some correlations are NA } if(n.keys > 1) { item.cor <- sqrt(diag(1/(key.lambda6*scale.var))) %*% (item.cov) # %*% diag(1/sqrt(item.var)) rownames(item.cor) <- colnames(keys) } else { item.cor <- r %*% keys /sqrt(key.lambda6*scale.var) } colnames(item.cor) <- colnames(r) item.cor <- t(item.cor) names(med.r) <- colnames(keys) if (correct) {cluster.corrected <- correct.cor(adj.r,t(key.alpha)) adj.r.vect <- as.vector(adj.r[lower.tri(adj.r)]) r.list [[group]] <- adj.r.vect alpha.list[[group]] <- list(alpha=key.alpha) cor.list[[group]] <- list(cor=adj.r) # names(result[group]) <- names(stats$r[group]) } #correct for attenuation else { result[[group]] <- list(cor=adj.r,sd=sqrt(var),alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6,item.cor=item.cor,med.r=med.r,Call=cl)} } #end of group loop names(alpha.list) <- names(cor.list) <- names(stats$r) r.list <- r.list[!is.na(r.list)] #this gets rid of the missing cases cor.mat <- matrix(unlist(r.list),ncol=length(adj.r.vect),byrow=TRUE) # rownames(cor.mat) <- names(cor.list[!is.na(cor.list)]) cnR <- abbreviate(colnames(keys),minlength=5) k <- 1 nvar <- NCOL(keys) temp.name <- rep(NA,nvar*(nvar-1)/2) #strange, but seems necessary for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { temp.name[k] <- paste(cnR[i],cnR[j],sep="-") # colnames(cor.mat)[k] <- paste(cnR[i],cnR[j],sep="-") k<- k +1 }} colnames(cor.mat)<- temp.name result <- list(alpha=alpha.list,cor = cor.list,cor.mat=cor.mat) class(result) <- c ("psych", "scoreBy") return(result)} #modified 01/11/15 to find r if not a square matrix #modifed 03/05/15 to do pseudo matrix multiplication in case of missing data select.from.list <- function(x){ ngroups <- length(x) result <-list() for (i in 1:ngroups) { result[[i]] <- x[[i]]$cor[lower.tri(x[[i]]$cor)]} return(result) } #use the print.psych function #modified March 22, 2009 to add G6 and corrected for item overlap correlation #also allow for correlation/covariance matrix input #modified Sept 3, 2010 to include response frequencies #modified November 11, 2010 to allow for data with lots of missingness to be scored without imputing means or medians #need to rethink the short option. Why bother since summary and print don't show scores anyway #added missing score to count missing responses for each scale instead of just the overall. #Modified November 22, 2013 to add confidence intervals for alpha #modified Sept 9, 2016 to add the keys.list option for the scoring keys #modified April 22, 2018 to include median within scale correlation #Modified August 25, 2020 to calculate median correlation in the case of variables with no variance "selectFromKeyslist" <- function(itemname,keys) {nkey <- length(keys) select <- NULL for (key in 1:nkey) { if(is.null(keys[[key]])) {select <- NULL} else { if(is.numeric(keys[[key]])) {select <- c(select,itemname[abs(unlist(keys[[key]]))]) } else {select <- c(select,sub("-", "", unlist(keys[[key]]))) } }} return(select)} psych/R/cortest.jennrich.R0000644000176200001440000000224011126167026015206 0ustar liggesusers"cortest.jennrich" <- function(R1,R2,n1=NULL, n2=NULL) { p <- dim(R1)[2] if(dim(R1)[1] != p) { n1 <- dim(R1)[1] R1 <- cor(R1,use="pairwise") warning ("R1 matrix was not square, correlations found") } if(dim(R2)[1] != dim(R2)[2] ) {n2 <- dim(R2)[1] R2 <- cor(R2,use="pairwise") warning ("R2 matrix was not square, correlations found") } if(!is.matrix(R1) ) R1 <- as.matrix(R1) #converts data.frames to matrices if needed if(!is.matrix(R2) ) R2 <- as.matrix(R2) if (dim(R1)[2] != dim(R2)[2]) stop("correlation matrices M and S must be of the same size!") if(is.null(n2)) n2 <- n1 if(!is.null(n1) & !is.null(n2)) c <- n1*n2/(n1+n2) else c <- 1 R <- (n1*R1+n2*R2)/(n1+n2) #matrix of average values S <- R * R #squared values of averaged correlations S.inv <- solve(S) R.inv <- solve(R) R.diff <- R1 - R2 Z <- sqrt(c) * R.inv %*% R.diff chi2 <- tr(Z%*%t(Z))/2 - t(diag(Z)) %*% S.inv %*% diag(Z) chi2 <- chi2[1,1] p <- dim(R1)[1] df <- p*(p-1)/2 results <- list(chi2 =chi2,prob =pchisq(chi2,df,lower.tail=FALSE)) return(results) } #Jennrich (1970) p 908 psych/R/tetrachor.R0000644000176200001440000004145214362030366013730 0ustar liggesusers#adapted from John Fox's Polychor #this does all the work #the following two functions are called repeatedly by tetrac and are put here to speed up the process # "tetraBinBvn.old" <- # function (rho,rc,cc) #adapted from John Fox's polychor # { row.cuts <- c(-Inf, rc, Inf) # col.cuts <- c(-Inf, cc, Inf) # P <- matrix(0, 2,2) # R <- matrix(c(1, rho, rho, 1), 2, 2) # for (i in 1:2) { # for (j in 1:2) { # P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]), # upper = c(row.cuts[i + 1], col.cuts[j + 1]), # corr = R) # }} # P #the estimated 2 x 2 predicted by rho, rc, cc # } #modified 5/8/14 to be consistent with call from tetraF #no change in functionality, just more esthetic #changed 10/16/14 to use sadmvn instead of mvtnorm "tetraBinBvn" <- function (rho,rc,cc) #adapted from John Fox's polychor { row.cuts <- c(-Inf, rc, Inf) col.cuts <- c(-Inf, cc, Inf) P <- matrix(0, 2,2) R <- matrix(c(1, rho, rho, 1), 2, 2) P[1,1] <- sadmvn(lower = c(row.cuts[1], col.cuts[1]), upper = c(row.cuts[2], col.cuts[2]), mean=c(0,0), varcov = R) P[2,1] <- pnorm(rc) - P[1,1] P[1,2] <- pnorm(cc) - P[1,1] P[2,2] <- 1- pnorm(rc) - P[1,2] P #the estimated 2 x 2 predicted by rho, rc, cc } "tetraF" <- function(rho,cc,rc,tab) { P <- tetraBinBvn(rho, rc, cc) -sum(tab * log(P)) } #the ML criterion to be minimized "tetrac" <- function(x,y=NULL,taux,tauy,i,j,correct=.5,global=TRUE,weight=NULL) { if(is.null(y)) {tab <- x} else { if(is.null(weight)) {tab <- tableVeryFast(x,y) } else {tab <- wtd.table(x,y,weight)} #switched to tableF for speed } #changed 9/8/14 #if((length(tab) < 4) | (is.na(sum(tab)) | ((tab[1,1] + tab[1,2]) < 1) | ((tab[2,1] + tab[2,2]) < 1) | ((tab[1,1] + tab[2,1]) < 1) | ((tab[2,1] + tab[2,2]) < 1))) {warning("For i = ", i," j = ",j, " No variance for either i or j rho set to NA") if((length(tab) < 4) | (is.na(sum(tab)) )) {warning("For i = ", i," j = ",j, " No variance for either i or j rho set to NA") result <- list(rho=NA,tau=c(NA,NA),objective=NA) } else { if((sum(tab) > 1) && (min(tab) == 0) && (correct > 0)) { message("For i = ", i," j = ",j, " A cell entry of 0 was replaced with correct = ", correct, ". Check your data!") tab[tab==0] <- correct #correction for continuity } ###### put in the weights here if(sum(tab)>0) { if(global) {cc <- taux rc <- tauy } else { tot <- sum(tab) tab <- tab/tot rc <- qnorm(colSums(tab))[1] cc <- qnorm(rowSums(tab))[1] } rho <- optimize(tetraF,interval=c(-1,1),rc=rc,cc=cc,tab=tab) result <- list(rho=rho$minimum,tau=c(cc,rc),objective=rho$objective) } else { result <- list(rho=NA,tau=c(NA,NA),objectiv=NA)} } return(result) } "wtd.table" <- function(x,y,weight) { tab <- tapply(weight,list(x,y),sum,na.rm=TRUE,simplify=TRUE) #taken from questionr:wtd.table tab[is.na(tab)] <- 0 return(tab) } #repeatedly do the analysis to form a matrix of output #added the pmin instead of min on Sept 10, 2013 "tetra.mat" <- function(x,y=NULL,correct=.5,smooth=TRUE,global=TRUE,weight=NULL) { #the functions to do parallelism myfun <- function(x,i,j) {if(t(!is.na(x[,i]))%*% (!is.na(x[,j])) > 2 ) { tetra <- tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global,weight=weight)} else { tetra <- list(rho=NA,tau=c(NA,NA),objective=NA)}} matpLower <- function(x,nvar) { k <- 1 il <- vector() jl <- vector() for(i in 2:nvar) {for (j in 1:(i-1)) { il[k] <- i jl [k] <- j k<- k+1} } #for debugging, turn off mcmapply tet <- mcmapply(function(i,j) myfun(x,i,j) , il,jl) #tet <- mapply(function(i,j) myfun(x,i,j) , il,jl) #for debugging, we do not do parallel cores #now make it a matrix mat <- diag(nvar) mat[upper.tri(mat)] <- as.numeric(tet[1,]) #first row of poly is correlation, 2nd the fit mat <- t(mat) + mat diag(mat) <- 1 return(mat) } nvar <- dim(x)[2] mx <- apply(x,2,function(x) min(x,na.rm=TRUE)) x <- t(t(x) - mx) #x <- x -min(x,na.rm=TRUE) #in case the numbers are not 0,1 -- using pmin allows different minima for different variables n.obs <- dim(x)[1] if(is.null(y)) { if(max(x,na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dictomous data")} if(is.null(weight)) {tau <- -qnorm(colMeans(x,na.rm=TRUE))} else {tau <- -qnorm(apply(x,2,function(y) weighted.mean(y,weight,na.rm=TRUE)))} #weighted tau mat <- matrix(0,nvar,nvar) colnames(mat) <- colnames(y) rownames(mat) <- colnames(x) names(tau) <- colnames(x) #cat("\nFinding the tetrachoric correlations\n") #for (i in 2:nvar) { #progressBar(i^2/2,nvar^2/2,"Tetrachoric") # for (j in 1:(i-1)) { # if(t(!is.na(x[,i]))%*% (!is.na(x[,j])) > 2 ) { # tetra <- tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global,weight=weight) # mat[i,j] <- mat[j,i] <- tetra$rho} else {mat[i,j] <- mat[j,i] <- NA} # } # } # diag(mat) <- 1 mat <- matpLower(x,nvar) if(any(is.na(mat))) {warning("some correlations are missing, smoothing turned off") smooth <- FALSE} if(smooth) {mat <- cor.smooth(mat) } #makes it positive semidefinite colnames(mat) <- rownames(mat) <- colnames(x) result <- list(rho = mat,tau = tau,n.obs=n.obs) } else { # the case of having a y variable my <- apply(y,2,function(x) min(x,na.rm=TRUE)) #apply to y Dec 24, 2019 y <- t(t(y) - my) #y <- y -min(y,na.rm=TRUE) #in case the numbers are not 0,1 if(is.matrix(y)) {ny <- dim(y)[2] tauy <- -qnorm(colMeans(y,na.rm=TRUE)) n.obs.y <- dim(y)[1] } else { ny <- 1 n.obs.y <- length(y) tauy <- -qnorm(mean(y,na.rm=TRUE)) } y <- as.matrix(y) if(dim(x)[1] != n.obs.y) {stop("x and y must have the same number of observations")} taux <- -qnorm(colMeans(x,na.rm=TRUE)) nx <- dim(x)[2] mat <- matrix(0,nx,ny) colnames(mat) <- colnames(y) rownames(mat) <- colnames(x) for (i in 1:nx) { for (j in 1:ny) {tetra <- tetrac(x[,i],y[,j],taux[i],tauy[j],correct=correct) mat[i,j] <- tetra$rho } } colnames(mat) <- colnames(y) rownames(mat) <- colnames(x) mat <- t(mat) result <- list(rho = mat,tau = taux,tauy= tauy,n.obs=n.obs) } flush(stdout()) cat("\n" ) #put in to clear the progress bar return(result) } #convert comorbidity type numbers to a table pqr <- function(q1,q2=NULL,p=NULL) { if(length(q1) > 1) { q2 <- q1[2] p <- q1[3] q1 <- q1[1]} tab <- matrix(0,2,2) tab[1,1] <- p tab[2,1] <- q1-p tab[1,2] <- q2-p tab[2,2] <- 1-q1 - tab[1,2] return(tab)} #repeatedly do the analysis to form a matrix of output #added the pmin instead of min on Sept 10, 2013 "tetra.mat.sc" <- function(x,y=NULL,correct=.5,smooth=TRUE,global=TRUE,weight=NULL) { nvar <- dim(x)[2] mx <- apply(x,2,function(x) min(x,na.rm=TRUE)) x <- t(t(x) - mx) #x <- x -min(x,na.rm=TRUE) #in case the numbers are not 0,1 -- using pmin allows different minima for different variables n.obs <- dim(x)[1] if(is.null(y)) { if(max(x,na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dictomous data")} if(is.null(weight)) {tau <- -qnorm(colMeans(x,na.rm=TRUE))} else {tau <- -qnorm(apply(x,2,function(y) weighted.mean(y,weight,na.rm=TRUE)))} #weighted tau mat <- matrix(0,nvar,nvar) names(tau) <- colnames(x) #cat("\nFinding the tetrachoric correlations\n") for (i in 2:nvar) { progressBar(i^2/2,nvar^2/2,"Tetrachoric") for (j in 1:(i-1)) { if(t(!is.na(x[,i]))%*% (!is.na(x[,j])) > 2 ) { tetra <- tetrac(x[,i],x[,j],tau[i],tau[j],i,j,correct=correct,global=global,weight=weight) mat[i,j] <- mat[j,i] <- tetra$rho} else {mat[i,j] <- mat[j,i] <- NA} } } diag(mat) <- 1 if(any(is.na(mat))) {warning("some correlations are missing, smoothing turned off") smooth <- FALSE} if(smooth) {mat <- cor.smooth(mat) } #makes it positive semidefinite #colnames(mat) <- rownames(mat) <- colnames(x) #probably alreday know these result <- list(rho = mat,tau = tau,n.obs=n.obs) } else { # the case of having a y variable my <- apply(x,2,function(x) min(x,na.rm=TRUE)) y <- t(t(y) - my) #y <- y -min(y,na.rm=TRUE) #in case the numbers are not 0,1 if(is.matrix(y)) {ny <- dim(y)[2] tauy <- -qnorm(colMeans(y,na.rm=TRUE)) n.obs.y <- dim(y)[1] } else { ny <- 1 n.obs.y <- length(y)} tauy <- -qnorm(mean(y,na.rm=TRUE)) y <- as.matrix(y) if(dim(x)[1] != n.obs.y) {stop("x and y must have the same number of observations")} taux <- -qnorm(colMeans(x,na.rm=TRUE)) nx <- dim(x)[2] mat <- matrix(0,nx,ny) colnames(mat) <- colnames(y) rownames(mat) <- colnames(x) for (i in 1:nx) { for (j in 1:ny) {tetra <- tetrac(x[,i],y[,j],taux[i],tauy[j],correct=correct) mat[i,j] <- tetra$rho } } result <- list(rho = mat,tau = taux,tauy= tauy,n.obs=n.obs) } flush(stdout()) cat("\n" ) #put in to clear the progress bar return(result) } #convert comorbidity type numbers to a table pqr <- function(q1,q2=NULL,p=NULL) { if(length(q1) > 1) { q2 <- q1[2] p <- q1[3] q1 <- q1[1]} tab <- matrix(0,2,2) tab[1,1] <- p tab[2,1] <- q1-p tab[1,2] <- q2-p tab[2,2] <- 1 - q1 - q2 + p return(tab)} #the public function "tetrachoric" <- function(x,y=NULL,correct=.5,smooth=TRUE,global=TRUE,weight=NULL,na.rm=TRUE,delete=TRUE) { # if(!require(mnormt)) {stop("I am sorry, you must have mnormt installed to use tetrachoric")} cl <- match.call() if (!is.matrix(x) && !is.data.frame(x)) { if (length(x) ==4) {x <- matrix(x,2,2) } else { if(length(x) ==3 ) {x <- pqr(x) } else { stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, a comorbidity table, or a data.frame/matrix of data")} }} nvar <- dim(x)[2] n.obs <- dim(x)[1] # if(!is.numeric(x)) {x <- matrix(as.numeric(x),ncol=nvar) # message("Converted non-numeric input to numeric")} if(!is.null(weight)) {if (length(weight)!= n.obs) stop("The number of weights must match the number of observations") } if (n.obs == nvar) {result <- tetrac(x,correct=correct,i=1,j=1,global=FALSE)} else { #first delete any bad cases item.var <- apply(x,2,sd,na.rm=na.rm) bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) & delete) { for (baddy in 1:length(bad)) {warning( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} x <- x[,-bad] nvar <- nvar - length(bad) } # parallel is now built into the system, so we don't need this. # if(!require(parallel)) {warning("need parallel installed to take advantage of multiple cores. Using single core version instead") # result <- tetra.mat.sc(x,y=y,correct=correct,smooth=smooth,global=global,weight=weight)} else { result <- tetra.mat(x,y=y,correct=correct,smooth=smooth,global=global,weight=weight)} result$Call <- cl class(result) <- c("psych","tetra") return(result) } #modified 1/14/14 to include the tableF function to double the speed for large problems #modified 12/25/19 to use tableVeryFast to be even be faster () "tetrachor" <- function(x,correct=.5) { #if(!require(mnormt)) {stop("I am sorry, you must have mnormt installed to use tetrachor")} cl <- match.call() if (!is.matrix(x) && !is.data.frame(x)) { if (length(x) ==4) {x <- matrix(x,2,2) } else { if(length(x) ==3 ) {x <- pqr(x) } else { stop("Data must be either a 1 x 4 vector, a 2 x 2 matrix, a comorbidity table, or a data.frame/matrix of data")} }} nvar <- dim(x)[2] if (dim(x)[1] == nvar) {result <- tetrac(x,correct=correct)} else { result <- tetra.mat(x,correct=correct)} result$Call <- cl class(result) <- c("psych","tetra") return(result) } #does the work "biserialc" <- function(x,y,i,j) { cc <- complete.cases(x,y) x <- x[cc] y <- y[cc] yf <- as.factor(y) lev <- levels(yf) if(length(lev)!=2) {#stop("y is not a dichotomous variable") warning("For x = ",i, " y = ", j, " y is not dichotomous") r <- NA} else { ty <- table(y) tot <- sum(ty) tab <- ty/tot if(length(tab) < 2) {r <- NA warning("For x = ",i, " y = ", j, " no variance for y r set to NA")} else { #this treats the case of no variance in the dichotmous variable zp <- dnorm(qnorm(tab[2])) hi <- mean(x[y==lev[2]],na.rm=TRUE) lo <- mean(x[y==lev[1]],na.ram=TRUE) # r <- (hi - lo)*sqrt(prod(tab))/(sd(x,na.rm=TRUE)) #point biserial r <- (hi - lo)*(prod(tab))/(zp * sd(x,na.rm=TRUE)) if(!is.na(r) && abs(r) >1 ) { if (r > 1) {r <- 1 } else {r <- -1} #in case we are correlating a dichotomous variable with itself warning("For x = ",i, " y = ", j, " x seems to be dichotomous, not continuous") }}} return(r) } "biserial" <- function(x,y) { x <- as.matrix(x,drop=FALSE) y <- as.matrix(y,drop=FALSE) nx <- dim(x)[2] ny <- dim(y)[2] if(is.null(nx)) nx <- 1 if(is.null(ny)) ny <- 1 mat <- matrix(NaN,nrow=ny,ncol=nx) colnames(mat) <- colnames(x) rownames(mat) <- colnames(y) #cat("\n Finding the biserial correlations\n") for(i in 1:ny) { progressBar(i*(i-1)/2,ny^2/2,"Biserial") for (j in 1:nx) { mat[i,j] <- biserialc(x[,j],y[,i],j,i) }} flush(stdout()) cat("\n" ) #put in to clear the progress bar return(mat) } "polyserial" <- function(x,y) { # y <- matrix(y) min.item <- min(y, na.rm = TRUE) max.item <- max(y, na.rm = TRUE) if(is.null(ncol(y))) {n.var <- 1 n.cases <- length(y) } else {n.var <- ncol(y) n.cases <- dim(y)[1]} dummy <- matrix(rep(min.item:max.item, n.var), ncol = n.var) colnames(dummy) <- names(y) xdum <- rbind(y, dummy) frequency <- apply(xdum, 2, table) frequency <- t(frequency - 1) responses <- rowSums(frequency) frequency <- frequency/responses frequency <- t(apply(frequency,1,cumsum)) len <- dim(frequency)[2] tau <- dnorm(qnorm(frequency[,-len,drop=FALSE])) stau <- rowSums(tau) rxy <- cor(x,y,use="pairwise") sdy <- apply(y,2,sd,na.rm=TRUE) rps <- t(rxy) * sqrt((n.cases-1)/n.cases) * sdy/stau rps[rps > 1.0] <- 1.0 rps[rps < -1.0] <- -1.0 return(rps) } #modified November 28, 2014 to be slightly more aggressive about smoothing #modified January 18l, 2023 to smooth covariances as well #this is more similar to cov2cor(nearPD$mat) "cor.smooth" <- function(x,eig.tol=10^-12) { if(!isCorrelation(x) & !isCovariance(x)) {x <- cor(x,use="pairwise") message("Pearson correlations of the raw data were found")} eigens <- try(eigen(x),TRUE) if(inherits(eigens, as.character("try-error"))) {warning('I am sorry, there is something seriously wrong with the correlation matrix,\ncor.smooth failed to smooth it because some of the eigen values are NA. \nAre you sure you specified the data correctly?') } else { if(min(eigens$values) < .Machine$double.eps) {warning("Matrix was not positive definite, smoothing was done") #eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps eigens$values[eigens$values < eig.tol] <- 100 * eig.tol nvar <- dim(x)[1] tot <- sum(eigens$values) eigens$values <- eigens$values * nvar/tot cnames <- colnames(x) rnames <- rownames(x) x <- eigens$vectors %*% diag(eigens$values) %*% t(eigens$vectors) x <- cov2cor(x) colnames(x) <- cnames rownames(x) <- rnames} } return(x)} #modified January 9, 2012 to add the try so we don't fail (just complain) if the data are bad. #identify the most likely candidates for a bad item "cor.smoother" <- function(x,cut=.01) { nvar <- ncol(x) result <- list() if(nrow(x) != nvar) x <- cor(x,use="pairwise") bad <- rep(NA,nvar) good <- rep(TRUE,nvar) names(good) <- names(bad) <- colnames(x) for (i in 1:nvar) { ev <- eigen(x[-i,-i])$values if(any(ev < 0) ) {bad[i] <- TRUE good[i] <- FALSE} bad[i] <- sum((ev < 0),na.rm=TRUE) } if(sum(bad+0) > 0 ) {result$good <- bad[(bad > 0)] result$bad <- good[good] s <- cor.smooth(x) possible <- arrayInd(which.max(abs(s-x)),dim(x),.dimnames=colnames(x)) result$likely <- colnames(x)[possible] result$possible <- arrayInd(which(abs(s-x) > cut),dim(x),.dimnames=colnames(x)) result$possible <- sort(table(colnames(x)[result$possible]),decreasing=TRUE) } else {result$bad <- c("all ok")} class(result) <- c("psych","smoother") return(result) } tableVeryFast <- function(x,y){ #just takes 0,1 data #maxxy <- 4 #(maxx+(minx==0))*(maxy+(minx==0)) bin <- x + y*2+ 1 dims=c(2 ,2) ans <- matrix(tabulate(bin,4),dims) ans } psych/R/factor.fit.R0000644000176200001440000000025211172270216013761 0ustar liggesusers"factor.fit" <- function (r,f) { r2 <-sum( r*r) rstar <- factor.residuals(r,f) rstar2 <- sum(rstar*rstar) fit<- 1- rstar2/r2 return(fit) } psych/R/VSS.plot.R0000644000176200001440000000112611043363746013363 0ustar liggesusers"VSS.plot" <- function(x,title="Very Simple Structure",line=FALSE) { op <- par(no.readonly = TRUE) # the whole list of settable par's. n=dim(x) symb=c(49,50,51,52) #plotting sym plot(x$cfit.1,ylim=c(0,1),type="b",ylab="Very Simple Structure Fit",xlab="Number of Factors",pch=49) if (line) lines(x$fit) title(main=title) x$cfit.2[1]<-NA x$cfit.3[1]<-NA x$cfit.3[2]<-NA x$cfit.4[1]<-NA x$cfit.4[2]<-NA x$cfit.4[3]<-NA lines(x$cfit.2) points(x$cfit.2,pch=50) lines(x$cfit.3) points(x$cfit.3,pch=symb[3]) lines(x$cfit.4) points(x$cfit.4,pch=symb[4]) par(op) } psych/R/fisherz.R0000644000176200001440000000126113356773242013412 0ustar liggesusers"fisherz" <- function(rho) {0.5*log((1+rho)/(1-rho)) } #converts r to z "fisherz2r" <- function(z) {(exp(2*z)-1)/(1+exp(2*z)) } #converts back again "r2d" <- function(rho) {2*rho/sqrt(1-rho^2)} "d2r" <- function(d) {d/sqrt(d^2+4)} #added sign correction October 8, 2018 "t2r" <- function(t,df) {sign(t) * sqrt(t^2/(t^2 + df))} #fixed April 27, 2017 "g2r" <- function(g,df,n) {sign(g) * g/sqrt(g^2 + 4*df/n)} "chi2r" <- function(chi2,n) {sqrt(chi2/n)} "r2chi" <- function(rho,n) { chi2 <-( rho^2 *n)} "cor2cov" <- "r2c" <- function(rho,sigma) { sigma <- diag(sigma) cov <- sigma %*% rho %*% sigma colnames(cov) <- rownames(cov) <- colnames(rho) return(cov)} psych/R/bassAckward.diagram.r0000644000176200001440000001426014110736327015623 0ustar liggesusers#modified 6/620 to increase drastically increase speed by taking the drawing out of a loop #fixed a problem of plotting in wrong order 8/20/20 #perhaps finally fixed the variables to appear in the right order for both lr and vertical mode 09/12/20 bassAckward.diagram <- function(x,digits=2,cut = .3,labels=NULL,marg=c(1.5,.5,1.0,.5), main="BassAckward",items=TRUE,sort=TRUE,lr=TRUE,curves=FALSE,organize=TRUE,values=FALSE,...) { old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back if(x$fm == "pca") {pca <-TRUE} else {pca <- FALSE} #added 19/08/21 to draw boxes for components if(organize) x <- ba.organize(x) nf = length(x$bass.ack) #this counts how many results are there if(!items) nf <- nf-1 if(sort){ x$bass.ack[[nf]] <- fa.sort(x$bass.ack[[nf]]) x$labels[[nf]] <- rownames(x$bass.ack[[nf]]) } if(lr) {ylim <- c(0,NROW(x$bass.ack[[nf]])) #this is the number of variables xlim <- c(-1.1,(nf-1.9)) } else {xlim <- c(.9,NROW(x$bass.ack[[nf]])) ylim <- c(-1.1,(nf-1.9))} lower <- list() upper <- list() if(is.null(labels)) labels <- x$labels if(values) labels<- add.values(labels,x$fa.vac , lr = lr) if(items) labels[[nf]] <- x$labels[[nf]] #this puts in the bottom row/left hand side (the items) plot(0,type="n",xlim=xlim,ylim=ylim,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) #first draw the bottom row nvar <- NROW(x$bass.ack[[nf]]) max.var <- nvar rname <- labels[[nf]] if(lr) { all.rects.x <- rep(-1,nvar) all.rects.y <- seq(nvar,1, -1 ) #top to bottom numbering all.rects.rname <- rname[1:nvar] } else { all.rects.y <- rep(-1,nvar) all.rects.x <- seq(1:nvar) #all.rects.rname <- rname[seq(nvar,1,-1)] } all.rects.rname <- rname[seq(1, nvar, 1)] } #first define the various locations but don't draw them for(j in 1:nvar) { if(lr) {lower [[j]] <- dia.rect(-1, nvar - j + 1, rname[ j],draw=FALSE,...) } else {lower [[j]] <- dia.rect(j,-1, rname[j],draw=FALSE,...)} # if(lr) {lower [[j]] <- dia.rect(-1,j , rname[j],draw=FALSE,...) } else {lower [[j]] <- dia.rect(j,-1, rname[j],draw=FALSE,...)} #12/07/20 } #if either the bottom (item) row or pca draw rectangles. otherwise ellipses if(pca | items) {dia.rect(all.rects.x,all.rects.y,all.rects.rname) } else { dia.multi.ellipse(all.rects.x,all.rects.y,all.rects.rname) } #now draw the next row and then repeat until the top for(j in (nf):2) { if((j < nf) & organize) x <- ba.organize(x,j) nvar <- NCOL(x$bass.ack[[j]]) scale <- max.var/(nvar+1) #store the locations if(lr) { all.rects.x <- rep(nf-j,nvar) all.rects.y <- seq(nvar,1,-1) * scale all.rects.rname <- labels[[j-1]] } else { all.rects.y <- rep(nf-j,nvar) all.rects.x <- seq(1:nvar) *scale all.rects.rname <- labels[[j-1]] } for(i in 1:nvar) { #which is actually the number of lower level variables or factors cname <- labels[[j-1]] # if(lr) {upper[[i]] <- dia.rect(nf-j,(nvar-i + 1) *scale, labels= cname[i],draw=FALSE,...)} else { upper[[i]] <- dia.rect(i*scale,nf-j, labels= cname[i],draw=FALSE,...) } if(lr) {upper[[i]] <- dia.ellipse(nf-j,(nvar-i + 1) *scale, labels= cname[i],draw=FALSE,e.size=1,...)} else { upper[[i]] <- dia.ellipse(i*scale,nf-j, labels= cname[i],draw=FALSE,e.size=1,...) } } if(pca) {dia.rect(all.rects.x,all.rects.y,all.rects.rname) } else { #this makes all the ellipses the same size, but we need to adjust the limits to match the ellipses dia.multi.ellipse(all.rects.x,all.rects.y,all.rects.rname) } #connect them and then put in the correlation values text.values <- list() #save the text values from the arrows ki <- 1 #set the counter to 1 for(i in 1:nvar) {#do it for every top factor if(length(x$Phi)>0) {Phi <- x$Phi[[j-1]]} else {Phi <- NULL} nfact <- NROW(x$bass.ack[[j]]) if(!is.null(Phi) && (ncol(Phi) >1) && curves) { if(i < nvar) {for(k in ((i+1):(nvar))) { if(abs(Phi[i,k]) > cut) { if(lr){dia.curve(from=upper[[i]]$right,to=upper[[k]]$right,labels=round(Phi[i,k],digits),scale = .2 , ...) } else {dia.curve(from=upper[[i]]$top,to=upper[[k]]$top,labels=round(Phi[i,k],digits),scale = .2 , ...)} } }} } for(k in 1:nfact) { if(abs(x$bass.ack[[j]][k,i]) > cut ) { #just draw the large loadings value <- x$bass.ack[[j]][k,i] if(lr) { text.values[[ki]] <- dia.arrow(upper[[i ]]$left,lower[[k]]$right,adj=((i-k) %% 3) ,labels = round(value,digits), col=(sign(value <0) +1),lty=(sign(value<0)+1),draw=FALSE,...) } else { text.values[[ki]] <- dia.arrow(upper[[i]]$bottom,lower[[k]]$top,adj=((i-k) %% 3) ,labels = round(value,digits), col=(sign(value <0) +1),lty=(sign(value<0)+1),draw=FALSE,...)} ki <- ki +1 } } } tv <- matrix(unlist(text.values),byrow=TRUE,ncol=21) text(tv[,1],tv[,2],tv[,3]) # ,tv[,5]) #don't use the adj parameter arrows(x0=tv[,6],y0=tv[,7],x1=tv[,8],y1=tv[,9],length=tv[1,10],angle=tv[1,11],code=1,col=tv[,20],lty=tv[,21]) arrows(x0=tv[,13],y0=tv[,14],x1=tv[,15],y1=tv[,16],length=tv[1,17],angle=tv[1,18],code=2,col=tv[,20],lty=tv[,21]) lower <- upper #go on to the next level upper <- list() #zeros it out } invisible(x) } #organize the lowest two levels to get somewhat cleaner structures ba.organize <- function(x,level=NULL){ if(is.null(level)) {nf = length(x$bass.ack) #this counts how many results are there level0 <- fa.sort(x$bass.ack[[nf]]) x$labels[[nf]] <- rownames(level0) fa <- x$fa$loadings[[nf-1] ] #added as fa$loadings to match change in bassAckward fa <- fa[x$labels[[nf]],] x$fa[[nf-1] ] <- fa level1 <- fa.sort(x$bass.ack[[nf-1]]) ord1 <- rownames(level1) level0 <- level0[,ord1,drop=FALSE] colnames(level0) <- paste0("F",1:NCOL(level0)) x$bass.ack[[nf]] <- level0 x$bass.ack[[nf-1]] <- level1 } else {nf <- level #just organize the factors, not the items } return(x) } add.values <- function(labels=NULL,values=NULL,lr=FALSE) { nlab <- length(labels) if(length(values) < nlab) nlab=length(values) for (i in 1:nlab) { attributes(values[[i]]) <- NULL if(lr) {labels[[i]] <- paste(labels[[i]]," ",round(values[[i]],2))} else {labels[[i]] <- paste(labels[[i]],"\n",round(values[[i]],2))} } return(labels) } psych/R/cohen.d.R0000644000176200001440000002652414444576566013300 0ustar liggesusers"cohen.d" <- function(x,group,alpha=.05,std=TRUE,sort=NULL,dictionary=NULL,MD=TRUE,data=NULL) { #added the hedges option 2/22/22 #corrected the nmax calculation following a request/commment by cl <- match.call() group2 <- NULL if(inherits(x,"formula")) {ps <- fparse(x) #group was specified, call describeBy if(is.null(data)) {x <- try(get(ps$y)) if(is.null(dim(x) )) stop("You need to specify the data if asking for a specific variable")} else {select <- c(ps$y,ps$x) #check for bad input -- the Mollycoddle option if(any( !(select %in% colnames(data)) )) { cat("\nVariable names are incorrect. Offending items are ", select[which(!(c(ps$y,ps$x) %in% colnames(data)))],"\n") stop("Improper input. See above. ")} x <- data[,select] #add the group variable to x } #added tha ability to choose y variables if data is specified 01/17/21 group <- ps$x if(length(group ) > 1) {group2 <- group[2] group <- group[1]} } if(!is.null(group2)){cohen.d.by(x =x, group=group,group2=group2)} else { #x <- char2numeric(x) # just in case there are character values 12/12/20 -- removed 7/7/21 following problem reported by Frank Poppenmeier if ((length(group) ==1) && ( group %in% colnames(x) )) {group <- which(colnames(x) %in% group) group.in <- TRUE} else {group.in <- FALSE} stats <- statsBy(x,group) S <- stats$rwg if(MD) S.inv <- Pinv(S) #the pseudo inverse because it is possible this is not PSD added December 15, 2019, added as if statement 7/7/21 d <- stats$mean[2,] - stats$mean[1,] #this is a vector differerences sd.p <- sqrt((( (stats$n[1,]-1) * stats$sd[1,]^2) + (stats$n[2,]-1) * stats$sd[2,]^2)/(stats$n[1,]+stats$n[2,])) #if we subtract 2 from n, we get Hedges g sd.ph <- sqrt((((stats$n[1,]-1) * stats$sd[1,]^2) + (stats$n[2,]-1) * stats$sd[2,]^2)/(stats$n[1,]+stats$n[2,]-2)) #if we subtract 2 from n, we get Hedges g n <- stats$n[1,]+ stats$n[2,] #this is a vector of ns taken from the statsBy function cohen.d <- d/sd.p hedges.g <- d/sd.ph d <- cohen.d #basically use this in the Mahalanobis distance names(cohen.d) <- colnames(x) if(!group.in) {group <- which(colnames(stats$n)=="group")} n <- n[-group] #statsBy returns grouping variable as well, ignore it d <- d[-group] n1 <- stats$n[1,-group] n2 <- stats$n[2,-group] p1 <- n1/n #the proportion in group 1 p2 <- n2/n cohen.d <- cohen.d[-group] hedges.g <- hedges.g[-group] r <- cohen.d/sqrt(cohen.d^2 + 1/( p1*p2)) #} else {r <- cohen.d/sqrt(cohen.d^2 + 1/( p1*p2) )} #for unequal n otherwise this is just 4 t <- d2t(hedges.g,n1=n1,n2=n2) #added the n1 and n2 following a report from Emil Kiergagard 7/14/21 also used pooled sd formula (i.e. hedges.g) to find t This matches t.test with var.equal=TRUE p <- ( 1-pt(abs(t),n-2)) * 2 if(MD) {D <- sqrt(t(d) %*% S.inv %*% d) #convert to D units from D2 units wt.d <- t(d) %*% S.inv *d #what is this? D <- as.vector(D)} else {D <- NA wt.d <- NA} hedges.d.conf <- cohen.d.ci(hedges.g,n1=n1,n2=n2,alpha=alpha) cohen.d.conf <- cohen.d.ci(cohen.d,n1=n1,n2=n2,alpha=alpha) if(!is.null(dictionary)) {dict <- dictionary[match(rownames(cohen.d.conf),rownames(dictionary)),,drop=FALSE] cohen.d.conf <- cbind(cohen.d.conf,dict)} else {dict=NULL} if(!is.null(sort)) { ord <- cohen.d.conf[,"effect",drop=FALSE] if(sort %in%( c("decreasing","increasing","TRUE"))) {#first convert the effect column to a vector so we can sort it if(sort == "increasing") {sort= FALSE} else {sort =TRUE} ord <- order(ord,decreasing=sort ) #ord <- order(cohen.d.conf[,"effect",drop=FALSE,decreasing=TRUE)} else {ord <- dfOrder(cohen.d.conf["effect"],ascending=FALSE)} cohen.d.conf <- cohen.d.conf[ord,] dict <- dict[ord,,drop=FALSE] }} else {ord <- 1:NCOL(x)} se <- (cohen.d.conf[,3] - cohen.d.conf[,1])/2 #average upper - lower result <- list(cohen.d = cohen.d.conf,hedges.g = hedges.d.conf, M.dist = D, r=r,t=t,n=n,p=p, wt.d =wt.d,descriptive=stats,se=se,dict=dict,order=ord,Call=cl) class(result) <- c("psych","cohen.d") return(result) } } "d2t" <- function(d,n=NULL,n2=NULL,n1=NULL) {if(is.null(n1)) {t <- d*sqrt(n)/2} else if(is.null(n2)) {t <- d*sqrt(n1) } else { t <- d /sqrt(1/n1 + 1/n2)} return(t)} "t2d" <- function(t,n=NULL,n2=NULL, n1=NULL) {if(is.null(n1)) { d <- 2*t/sqrt(n)} else { if(is.null(n2)) { d <- t/sqrt(n1)} else { d <- t * sqrt(1/n1 + 1/n2)}} return(d)} # "d.ci" <- "cohen.d.ci" <- function(d,n=NULL,n2=NULL,n1=NULL,alpha=.05) { t <- d2t(d=d,n=n,n2=n2,n1=n1) # tail <- 1- alpha/2 # ci <- matrix(NA,ncol=3,nrow=length(d)) # for(i in 1:length(d)) { # nmax <- pmax(c(n/2+1,n1+1,n1+n2)) #divide n/2 added 9/16/21 #changed pmax to max 2222/22 # upper <- try(t2d( uniroot(function(x) {suppressWarnings(pt(q=t[i],df=nmax[i]-2,ncp=x)) - alpha/2}, c(min(-5,-abs(t[i])*10),max(5,abs(t[i])*10)))$root,n=n[i],n2=n2[i],n1=n1[i]),silent=TRUE) # # if(inherits( upper, "try-error")) {ci[i,3] <- NA} else {ci[i,3] <- upper} # ci[i,2] <- d[i] # lower.ci <- try(t2d(uniroot(function(x) {suppressWarnings(pt(q=t[i],df=nmax[i]-2,ncp=x)) - tail}, c(min(-5,-abs(t[i])*10),max(5,abs(t[i]) *10)))$root,n=n[i],n2=n2[i],n1=n1[i]),silent=TRUE) # if(inherits( lower.ci,"try-error")) {ci[i,1] <- NA} else {ci[i,1] <- lower.ci} # } # colnames(ci) <- c("lower","effect","upper") # rownames(ci) <- names(d) # return(ci) # } #note that when called from cohen.d, n is NULL "d.ci" <- "cohen.d.ci" <- function(d,n=NULL,n2=NULL,n1=NULL,alpha=.05) { t <- d2t(d=d,n=n,n2=n2,n1=n1) tail <- 1- alpha/2 ci <- matrix(NA,ncol=3,nrow=length(d)) for(i in 1:length(d)) { nmax <- max(c(n/2+1,n1+1,n1+n2)) #divide n/2 added 9/16/21 #changed pmax to max 2222/22 upper <- try(t2d( uniroot(function(x) {suppressWarnings(pt(q=t[i],df=nmax-2,ncp=x)) - alpha/2}, c(min(-5,-abs(t[i])*10),max(5,abs(t[i])*10)))$root,n=n[i],n2=n2[i],n1=n1[i]),silent=TRUE) if(inherits( upper, "try-error")) {ci[i,3] <- NA} else {ci[i,3] <- upper} ci[i,2] <- d[i] lower.ci <- try(t2d(uniroot(function(x) {suppressWarnings(pt(q=t[i],df=nmax-2,ncp=x)) - tail}, c(min(-5,-abs(t[i])*10),max(5,abs(t[i]) *10)))$root,n=n[i],n2=n2[i],n1=n1[i]),silent=TRUE) if(inherits( lower.ci,"try-error")) {ci[i,1] <- NA} else {ci[i,1] <- lower.ci} } colnames(ci) <- c("lower","effect","upper") rownames(ci) <- names(d) return(ci) } "m2d" <- function(m1,m2,s1,s2,n1=NULL,n2=NULL,n=NULL,pooled=TRUE){ if(pooled) {if(!is.null(n1)) {vp <- ((n1-1) * s1^2 + (n2-1)* s2^2)/(n1+n2 -2) sp <- sqrt(vp)} else { sp <- sqrt((s1^2 + s2^2)/2)}} else {sp <- s1} d <- (m1-m2)/sp return(d) } "m2t" <- function(m1,m2,s1,s2,n1=NULL,n2=NULL,n=NULL,pooled=TRUE ) { if(!is.null(n) ) { t <- (m1-m2)/sqrt((s1^2 + s2^2)/(n/2)) d <- 2*t/sqrt(n) df <- n-2} else { if(pooled) {vp <- ((n1-1) * s1^2 + (n2-1)* s2^2)/(n1+n2 -2 ) se = sqrt(vp*(1/n1 + 1/n2))} else {se = sqrt(s1^2/n1 + s2^2/n2)} t <- (m1-m2)/se df=n1 +n2 -2 if(!pooled) {df = (s1^2/n1 + s2^2/n2)^2/(s1^4/(n1^2 *(n1-1)) + s2^4/(n2^2 * (n2-1)))} d <- t * sqrt(1/n1 + 1/n2)} p <- 2* pt(abs(t),df,lower.tail=FALSE) result <- list(t=t,df=df,p= p,d=d) cat("\n t = ",t, "df =", df, " with probability = ",p,"\n") invisible(result) #return the values as well } "d2OVL" <- function(d) {2*pnorm(-abs(d)/2)} "d2OVL2" <- function(d) {d2OVL(d)/(2-d2OVL(d))} "d2CL" <- function(d) {pnorm(abs(d)/sqrt(2))} "d2U3" <- function(d) {pnorm(abs(d))} "cohen.d.by" <- function(x,group,group2,alpha=.05,MD=TRUE) { if(inherits(x,"formula")) {ps <- fparse(x) #group was specified, call describeBy x <- get(ps$y) group <- ps$x if(length(group ) > 1) {group2 <- group[2] group <- group[1]}} group1 <- group group1name <- group group2name <- group2 group2 <- which(colnames(x) %in% group2) group1 <- which(colnames(x) %in% group) categories <- names(table(x[group2])) result <- list() for(i in 1:length(categories)) { group <- subset(x,x[group2]==categories[i]) group <- group[-group2] result[[i]] <- cohen.d(group,group1name,MD=MD) } names(result) <- paste0(group1name,"for",group2name ,categories) class(result) <- class(result) <- c("psych","cohen.d.by") return(result) } "print_cohen.d" <- function(x,digits=2) {cat("Call: ") print(x$Call) cat("Cohen d statistic of difference between two means\n") if(NCOL(x$cohen.d) == 3) {print(round(x$cohen.d,digits=digits))} else {print( data.frame(round(x$cohen.d[1:3],digits=digits),x$cohen.d[4:NCOL(x$cohen.d)]))} cat("\nMultivariate (Mahalanobis) distance between groups\n") print(x$M.dist,digits=digits) cat("r equivalent of difference between two means\n") print(round(x$r,digits=digits)) } "print_cohen.d.by" <- function(x,digits=2) {cat("Call: ") print(x$Call) ncases <- length(x) for (i in (1:ncases)) {cat("\n Group levels = ",names(x[i]),"\n") cat("Cohen d statistic of difference between two means\n") print(x[[i]]$cohen.d,digits=digits) cat("\nMultivariate (Mahalanobis) distance between groups\n") print(x[[i]]$M.dist,digits=digits) cat("r equivalent dof difference between two means\n") print(x[[i]]$r,digits=digits) } } #Following Algina 2015 "d.robust" <- function(x,group,trim=.2) { valid <- function(x) { sum(!is.na(x)) } nvar <- NCOL(x) means <- list() vars <- list() Sw <- d.robust <- rep(NA,nvar) n.by.grp <- list() if(nvar ==1) { means[1] <- by(x,group,function(x) mean(x,trim = trim, na.rm=TRUE)) vars[1] <- by(x,group,function(x) winsor.var(x,trim=trim,na.rm=TRUE)) } else { cn <- colnames(x) for (i in 1:nvar) { n.by.grp[[cn[i]]] <- by(x[,i],x[group],valid) means[[cn[i]]] <- by(x[,i],x[group],function(x) mean(x,trim = trim, na.rm=TRUE)) vars[[cn[i]]] <- by(x[,i],x[group],function(x) winsor.var(x,trim = trim, na.rm=TRUE)) } } mean.by.grp <- matrix(unlist(means),ncol=2,byrow=TRUE) vars.by.grp <- matrix(unlist(vars),ncol=2,byrow=TRUE) n.by.grp <- matrix(unlist(n.by.grp),ncol=2,byrow=TRUE) rownames(mean.by.grp) <- cn rownames(vars.by.grp) <- cn colnames(mean.by.grp) <-colnames(vars.by.grp) <- paste0("Grp",1:2) for(i in 1:nvar) { Sw[i] = sqrt((vars.by.grp[i,1] * (n.by.grp[i,1]-1) + vars.by.grp[i,2] * (n.by.grp[i,2]-1))/(n.by.grp[i,1] + n.by.grp[i,2]-2)) d.robust[i] <- .642 * (mean.by.grp[i,2] - mean.by.grp[i,1])/Sw[i] names(d.robust) <- cn } grp.loc <- which(row.names(mean.by.grp)==group) result <- list(means=mean.by.grp[-grp.loc,],vars=vars.by.grp[-grp.loc,],Spooled=Sw[-grp.loc],robust.d = d.robust[-grp.loc]) return(result) } #find the resampled M.dist November 3, 2018 cohen.d.expected <- function(x,group,n.rep=10 ) { summary <- list() n.obs <- nrow(x) observed <- cohen.d(x=x,group=group)$M.dist ind <- 1:n.obs for(i in 1:n.rep){ samp <- sample(ind,n.obs,replace=FALSE) #this is a random permutation of the order variable x[,group] <- x[samp,group] summary[[i]] <- cohen.d(x,group)$M.dist } result <- unlist(summary) mean.boot <- mean(result) sd.boot <- sd(result) result <-list(observed=observed,mean = mean.boot,sd=sd.boot,trials =result) return(result) } psych/R/bestItems.r0000644000176200001440000001051614301425520013722 0ustar liggesusers#added just correlate with criteria to speed it up (June 23, 2017) #fixed 7/16/22 to correctly handle the problem of raw data "bestItems" <- function(x,criteria=1,cut=.1, n.item=10, abs=TRUE, dictionary=NULL,check=FALSE,digits=2,use="pairwise",method="pearson") { if(check) {item.var <- apply(x,2,sd,na.rm=TRUE) #added check 10/14/17 bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) ) { for (baddy in 1:length(bad)) {message( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} x <- x[,-bad] } } result <- list() best <- list() key <- list() if(isCorrelation(x)) {raw=FALSE external<- FALSE cn.crit <- criteria} else {data <- x #find the correlations if(NROW(criteria)==NROW(data)) {cn.crit <- colnames(criteria) data <- cbind(data,criteria) external <- TRUE criteria <- cn.crit} else {cn.crit <- criteria external <- FALSE} x <- cor(data,data[,criteria,drop=FALSE],use=use,method=method) } if(external) {x[criteria,criteria] <- 0 } #next part removed 7/17/22 for(i in 1:length(criteria)) {criterion <- criteria[i] # if(raw) { x <- cor(data,data[,criterion,drop=FALSE],use="pairwise") # if(NROW(criterion)> 1) {x <- cbind(x,criterion) #to allow for a separate object # criteron <- "criteria" } # colnames(x)<- criterion # } #the normal case --convert to correlation if necessary x[criterion,criterion] <- 0 if(abs) {ord <- order(abs(x[,criterion,drop=FALSE]),decreasing=TRUE) value <- x[ord,criterion,drop=FALSE] count <- sum(abs(value) > cut,na.rm=TRUE) if(!is.null(n.item)) count <- min(count,n.item) value <- value[1:count,,drop=FALSE] } else {ord <- order(x[,criterion,drop=FALSE],decreasing=TRUE) value <- x[ord,criterion,drop=FALSE] value <- value[value,criterion > cut,drop=FALSE] } value <- round(data.frame(value),digits) colnames(value) <- cn.crit[i] #this is a kludge to get around a problem with data.frame renaming the variable if((!is.null(dictionary)) && !is.factor(dictionary)) {temp <- lookup(rownames(value),dictionary) value <- merge(value,temp,by="row.names",all.x=TRUE,sort=FALSE) rownames(value) <- value[,"Row.names"] value <- value[,-1] if(abs) {ord <- order(abs(value[,criterion]),decreasing=TRUE) } else {ord <- order(value[,criterion],decreasing=TRUE)} value <- value[ord,] } best[[criterion]] <- value } keys <- best2list(best) result <- list(bestItems=best,bestKeys=keys) return(result) } best2list <-function(values) { keys.list <- list() nkeys <- length(values) for (i in 1:nkeys) { temp <- rownames(values[[i]] ) rev <- which (values[[i]][1] < 0) if(length(rev) > 0 ) { temp[rev] <- paste0("-",temp[rev]) } keys.list[[i]] <- temp } names(keys.list) <- names(values) return(keys.list) } #adjusted 11/15/20 to add correlations if provided "lookupFromKeys" <- function(keys.list,dictionary,n=20,cors = NULL,sort=TRUE,suppress.names=FALSE,digits=2){ n.scales <- length(keys.list) results <- item.cors <- result.df <- list() for(i in 1:n.scales) { list.name <- names(keys.list[i]) list.i <- keys.list[[i]] keys <- rep(1,length(list.i))[1:(min(n,length(list.i)))] neg <- grep("-", list.i[1:(min(n,length(list.i)))]) keys[neg] <- -1 select <- sub("-", "", list.i) results[[i]] <- lookup(select[1:(min(n,length(list.i)))],dictionary) if(!is.null(rownames(results[[i]])[keys < 0])) rownames(results[[i]])[keys < 0] <- paste0(rownames(results[[i]])[keys<0],"-") if(!is.null(cors)) { item.cors[[i]] <- round(cors[select[1:(min(n,length(select)))],i],digits=digits) result.df[[i]] <- data.frame(results[[i]],cors=item.cors[[i]]) if(sort) { ord <- order(abs(item.cors[[i]]),decreasing=TRUE) result.df[[i]] <- result.df[[i]][ord,] } } else {result.df[[i]] <- data.frame(results[[i]])} #results[[i]] <- c(results[[i]],cors= round(cors[select[1:n],i],digits=digits)) if(suppress.names) names(results[[i]]) <- "" # names(results[i]) <- list.name } names(result.df) <- names(keys.list) return(result.df)} #lookup which x's are found in y[c1],return matches for y[] "lookup" <- function(x,y,criteria=NULL,keep.na=FALSE) { if (is.null(criteria)) {temp <- match(x,rownames(y))} else { temp <- match(x,y[,criteria])} if(any(!is.na(temp))) { y <- (y[temp[!is.na(temp)],,drop=FALSE]) } else {y <- NA} return(y)} psych/R/eigen.loadings.R0000644000176200001440000000064410771314614014622 0ustar liggesusers"eigen.loadings" <- function (x) { if(!is.null(x$vector)) { ans <- x$vectors %*% sqrt(diag(x$values)) colnames(ans) <- rownames(ans) <- rownames(x$vector) return(ans) } else if(!is.null(x$loadings)) { ans <- x$loadings %*% diag(x$sdev) rownames(ans) <- rownames(x$loadings) colnames(ans) <- colnames(x$loadings) return(ans) } } #convert eigen vectors to principal component loadings by unnormalizing them psych/R/scoreBy.r0000644000176200001440000002174114340716573013411 0ustar liggesusers#Find scores from correlations for each subject #this is a version scoreOverlap adjusted the case of many subjects #impute missing values? #The correlations for each subject are found by statsBy #and are stored as a list of matrices in the r element #The trick is how to handle missing correlations #Various fixes 1/15/22 "scoreBy" <- function(keys,stats,correct=TRUE,SMC=TRUE,av.r=TRUE,item.smc=NULL,impute=TRUE,select=TRUE,min.n=3,smooth=FALSE) { #function to score clusters according to the key matrix, correcting for item overlap cl <- match.call() if(!inherits(stats, "statsBy")) stop("Please run statsBy first. Make sure that you specified keys before stats.") if(is.null(stats$r)) stop("I am sorry, but you need to run statsBy with the cors=TRUE option first") MIN.n <- min.n Item.smc <- item.smc #we redefine later, so we need to keep this original value bad.matrix <-0 na.matrix <- 0 r.list <- stats$r #the call uses the output of statsBy Select <- select keys.list<- keys #we are saving these for each pass ngroups <-length(r.list) tol=sqrt(.Machine$double.eps) #machine accuracy var.names <- colnames(stats$r[[1]]) keys <- keys.list #these are the original keys select <- Select #the original values bad <- FALSE if(is.list(keys) & (!is.data.frame(keys))) { if (select) { select <- selectFromKeyslist(var.names,keys) # select <- sub("-","",unlist(keys)) #added April 7, 2017 select <- select[!duplicated(select)] } else {select <- 1:length(var.names) } } keys <- make.keys(stats$r[[1]][select,select],keys) #added 9/9/16 (and then modified March 4, 2017 #move outside of loop if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix #now, do repeated scoring, one pass for each group result <- list(Call = cl) cor.list <- list() var.list <- list() alpha.list <- list() r.list <- list() for(group in 1:ngroups) { num.n <- stats$nWg[[group]] #this is the pairwise data #if(sum(!is.na(num.n)) < 10 || mean(num.n,na.rm=TRUE) < MIN.n){ if(mean(num.n,na.rm=TRUE) < MIN.n){ cor.list[[group]] <- NA alpha.list[[group]] <- NA #add an empty element r.list[[group]] <- NA next() } r <- stats$r[[group]] r[num.n < min.n] <- NA #if (!isCorrelation(r)) {r <- cor(r[,select],use="pairwise")} else r <- r[select,select] #only use correlations #if ((dim(r)[1] != dim(r)[2]) ) {r <- cor(r,use="pairwise")} #if(any(abs(r[!is.na(r)]) > 1)) warning("Something is seriously wrong with the correlation matrix, some correlations had absolute values > 1! Please check your data.") if(any(is.na(r))) { SMC=FALSE # warning("Missing values in the correlation matrix do not allow for SMC's to be found") bad <- TRUE} if(SMC && is.null(Item.smc)) {item.smc <- smc(r)} else { diag(r) <- NA item.smc <- apply(r,1,function(x) max(abs(x),na.rm=TRUE)) item.smc[is.infinite(item.smc) ] <- 1 diag(r) <- 1} if(all(item.smc ==1)) SMC <- FALSE #item.smc <- rep(1,NCOL(r)) #why are we doing this? if(!bad) {covar <- t(keys) %*% r %*% keys} else #matrix algebra is our friend { covar<- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))*x,na.rm=TRUE)) #matrix multiplication without matrices! covar <- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colMeans(r*x,na.rm=TRUE))*x,na.rm=TRUE)) *NROW(keys) # we find the means of the non-zero elements by adjusting for the number of these elements covar <- score.na(keys,r,cor=FALSE) } n.keys <- ncol(keys) item.var <- item.smc raw.r <- cov2cor(covar) # key.var <- diag(t(keys) %*% keys) #if correlations key.var <- diag(r) %*% abs(keys) #if covariances or correlations key.n <- diag(t(keys) %*% keys) var <- diag(covar) #these are the scale variances unless we handled bad data key.smc <- as.vector( t(abs(keys)) %*% item.smc ) #key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) key.alpha <- ((var-key.var)/var)*(key.n/(key.n-1)) key.lambda6 <- (var - key.var + key.smc)/var key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1)) #alpha 1 = average r colnames(raw.r) <- rownames(raw.r) <- colnames(keys) names(key.lambda6) <- colnames(keys) key.lambda6 <- drop(key.lambda6) n.keys <- ncol(keys) sn <- key.av.r * key.var/(1-key.av.r) if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations raw.cov <- item.cov %*% keys} else { # item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE)) #some correlations are NA which makes this value too low item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE)) #we use the means to fix this problem raw.cov <- apply(keys,2,function(x) colSums(item.cov*x,na.rm=TRUE)) #raw.cov <- covar item.cov <- t(item.cov) #what is going on? } adj.cov <- raw.cov #now adjust them med.r <- rep(NA, n.keys) for (i in 1:(n.keys)) { temp <- keys[,i][abs(keys[,i]) > 0] temp <- diag(temp,nrow=length(temp)) small.r <- r[abs(keys[,i])>0,abs(keys[,i])>0] small.r <- temp %*% small.r %*% temp #does not work with NA med.r[i] <- median(small.r[lower.tri(small.r)],na.rm=TRUE) for (j in 1:i) { #maybe don't adjust if(av.r) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] * sqrt(key.av.r[i] * key.av.r[j])) } else { adj.cov[i,j] <- adj.cov[j,i] <- raw.cov[i,j] - sum(keys[,i] * keys[,j] )+ sum( keys[,i] * keys[,j] * sqrt(item.smc[i]* abs(keys[,i])*item.smc[j]*abs(keys[,j]) )) } } } scale.var <- diag(raw.cov) diag(adj.cov) <- diag(raw.cov) adj.r <- cov2cor(adj.cov) #this is the overlap adjusted correlations if(smooth) { if(any(is.na(adj.r))) {na.matrix <- na.matrix+1} else { ev <- eigen(adj.r) bad.matrix <- bad.matrix + any(ev$value<0) adj.r <- cor.smooth(adj.r) } } #adjust the item.cov for item overlap #we do this by replacing the diagonal of the r matrix with the item.var (probably an smc, perhaps a maximum value) diag(r) <- item.var if(!bad) { item.cov <- t(keys) %*% r #the normal case is to have all correlations } else { #item.cov <- t(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))) #some correlations are NA item.cov <- matrixMult.na(t(keys),r,scale=FALSE) } if(n.keys > 1) { item.cor <- sqrt(diag(1/(key.lambda6*scale.var))) %*% (item.cov) # %*% diag(1/sqrt(item.var)) rownames(item.cor) <- colnames(keys) } else { item.cor <- r %*% keys /sqrt(key.lambda6*scale.var) } colnames(item.cor) <- colnames(r) item.cor <- t(item.cor) names(med.r) <- colnames(keys) adj.r.vect <- as.vector(adj.r[lower.tri(adj.r)]) if (correct) {cluster.corrected <- correct.cor(adj.r,t(key.alpha)) r.list [[group]] <- adj.r.vect alpha.list[[group]] <- list(alpha=key.alpha) cor.list[[group]] <- list(cor=adj.r) var.list [[group]] <- list(var=var/(key.n^2)) # names(result[group]) <- names(stats$r[group]) } #correct for attenuation else { result[[group]] <- list(cor=adj.r,var=var, sd=sqrt(var),alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6,item.cor=item.cor,med.r=med.r,Call=cl)} } #end of group loop names(alpha.list) <- names(cor.list) <- names(stats$r) #if(!exists("adj.r.vect")) browser() #debugging option # r.list <- r.list[!is.na(r.list)] #this gets rid of the missing cases cor.mat <- matrix(unlist(r.list),ncol=length(adj.r.vect),byrow=TRUE) var.mat <- matrix(unlist(var.list),ncol=NCOL(keys),byrow=TRUE) # rownames(cor.mat) <- names(cor.list[!is.na(cor.list)]) rownames(var.mat) <- names(cor.list[!is.na(cor.list)]) colnames(var.mat)<- colnames(keys) rownames(cor.mat) <- names(cor.list[!is.na(cor.list)]) rownames(cor.mat) <- names(cor.list) cnR <- abbreviate(colnames(keys),minlength=5) k <- 1 nvar <- NCOL(keys) temp.name <- rep(NA,nvar*(nvar-1)/2) #strange, but seems necessary for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { temp.name[k] <- paste(cnR[i],cnR[j],sep="-") # colnames(cor.mat)[k] <- paste(cnR[i],cnR[j],sep="-") k<- k +1 }} colnames(cor.mat)<- temp.name result <- list(alpha=alpha.list,cor = cor.list,cor.mat=cor.mat,bad.matrix=bad.matrix, na.matrix=na.matrix,var = var.mat) class(result) <- c ("psych", "scoreBy") return(result)} #modified 01/11/15 to find r if not a square matrix #modifed 03/05/15 to do pseudo matrix multiplication in case of missing data select.from.list <- function(x){ ngroups <- length(x) result <-list() for (i in 1:ngroups) { result[[i]] <- x[[i]]$cor[lower.tri(x[[i]]$cor)]} return(result) }psych/R/sim.anova.R0000644000176200001440000000425011170172760013622 0ustar liggesusers "sim.anova" <- function ( es1 = 0, es2 = 0, es3 = 0, es12 = 0, es13 = 0, es23 = 0, es123 = 0, es11=0,es22=0, es33=0,n = 2,n1 = 2, n2 = 2, n3 = 2, within=NULL,r=.8,factors=TRUE,center = TRUE,std=TRUE) { contrasts <- function(n) { if (n%%2) { seq(-floor(n/2), floor(n)/2) } else { seq(-(n - 1), (n - 1), 2) } } if(n1 * n2 * n3) { n <- n * n1 * n2 * n3 } if(n1) { cont1 <- contrasts(n1) IV1 <- rep(cont1, n/n1)} else {IV1 <- IV1<- rnorm(n)} if(n2) {cont2 <- contrasts(n2) if (n1) { IV2 <- rep(outer(rep(1, n1), contrasts(n2)), n/(n2 * n1)) } else { IV2 <- rep(cont2,n/n2)} } else {IV2 <- rnorm(n)} if (n3) {cont3 <- contrasts(n3) if (n1) { if(n2) { IV3 <- rep(outer(rep(1, n1 * n2), contrasts(n3)), n/(n1 * n2 * n3)) } else {IV3 <- rep(outer(rep(1, n1 ), contrasts(n3)), n/(n1 * n3)) } } else {if(n2) {IV3 <- rep(outer(rep(1, n2 ), contrasts(n3)), n/(n2 * n3)) } else { IV3 <- rep(contrasts(n3),n/n3)} } } else {IV3=rnorm(n)} if(factors) {if(n1) {iv1<- factor(IV1)} else{iv1<- IV1} if(n2) {iv2<- factor(IV2)} else{iv2<- IV2} if(n3) {iv3<- factor(IV3)} else{iv3<- IV3} } if(std) {IV1 <- IV1/sd(IV1) IV2 <- IV2/sd(IV2) IV3 <- IV3/sd(IV3)} y <- es1 * IV1 + es2 * IV2 + es3 * IV3 + es12 * IV1 * IV2 + es13 * IV1 * IV3 + es23 * IV2 * IV3 + es123 * IV1 * IV2 * IV3 + es11*IV1*IV1 + es22*IV2*IV2 + es33*IV3*IV3 + rnorm(n) if(!is.null(within)) {yw <- within ny <- length(yw) y<-t(r* t(matrix(rep(y,ny),ncol=ny)) + yw + sqrt(1-r^2) * rnorm(n)) } if (!center) { IV1 <- IV1 - min(IV1) + 1 IV2 <- IV2 - min(IV2) + 1 IV3 <- IV3 - min(IV3) + 1 } if(factors) {y.df <- data.frame(IV1=iv1,IV2=iv2,IV3=iv3,DV=y)} else { y.df <- data.frame(IV1, IV2, IV3,DV=y)} } psych/R/corr.test.R0000644000176200001440000002326314527735601013667 0ustar liggesusers#modified 9/18/21 to find Kendall and Spearman probabilities "corr.test" <- "corTest" <- function(x,y=NULL,use="pairwise",method="pearson",adjust="holm",alpha=.05,ci=TRUE,minlength=5, normal=TRUE){ cl <- match.call() if(normal) { #the normal case if(is.null(y)) {r <- cor(x,use=use,method=method) sym <- TRUE n <- t(!is.na(x)) %*% (!is.na(x)) } else {r <- cor(x,y,use=use,method=method) sym=FALSE n <- t(!is.na(x)) %*% (!is.na(y))} if((use=="complete") | (min(n) == max(n))) n <- min(n) t <- (r*sqrt(n-2))/sqrt(1-r^2) #p <- 2*(1 - pt(abs(t),(n-2))) p <- -2 * expm1(pt(abs(t),(n-2),log.p=TRUE)) #suggested by Nicholas Clark se <- sqrt((1-r*r)/(n-2)) } else {temp <- prob.Kendall(x=x,y=y,method=method) if(is.null(y)) { sym <- TRUE n <- t(!is.na(x)) %*% (!is.na(x))} else {sym <- FALSE n <- t(!is.na(x)) %*% (!is.na(y))} r <- temp$r p <- temp$p se <- sqrt((1-r*r)/(n-2)) } nvar <- ncol(r) p[p>1] <- 1 pa <- p #in case we don't do adjustments if (adjust !="none") { if (is.null(y)) {lp <- upper.tri(p) #the case of a symmetric matrix (remember upper.tri =! lower tri) pa <- p[lp] pa <- p.adjust(pa,adjust) p[upper.tri(p,diag=FALSE)] <- pa } else { pa[] <- p.adjust(p,adjust) #the case of an asymmetric matrix #corrected 4/9/21 } } #find confidence intervals z <- fisherz(r[lower.tri(r)]) if(ci) { if (min(n) < 4) { warning("Number of subjects must be greater than 3 to find confidence intervals.") } if(sym) {ncors <- nvar * (nvar-1)/2} else ncors <- prod(dim(r)) if(adjust!="holm") {dif.corrected <- qnorm(1-alpha/(2* ncors)) } else { # 1- alpha/2 /nvar *(nvar-1) /2) ord <- order(abs(z),decreasing=FALSE) #to find the HOlm correction, we need to order the size of the correlations dif.corrected <- qnorm(1-alpha/(2*order(ord))) } #holm alpha <- 1-alpha/2 #the raw alpha level for confidence intervals dif <- qnorm(alpha) if(sym) { if(is.matrix(n)) { sef <- 1/sqrt(n[lower.tri(n)] - 3) } else { sef <- 1/sqrt(n - 3)} lower <- fisherz2r(z - dif * sef) upper <- fisherz2r(z + dif * sef) lower.corrected <- fisherz2r(z - dif.corrected * sef) upper.corrected <- fisherz2r(z + dif.corrected * sef) p.adj <- t(p) p.adj <- p.adj[lower.tri(p.adj)] ci <- data.frame(lower=lower,r=r[lower.tri(r)],upper=upper,p=p[lower.tri(p)]) ci2 <- data.frame(lower=lower,r=r[lower.tri(r)],upper=upper,p=p[lower.tri(p)],p.adj =p.adj) ci.adj <- data.frame(lower.adj=lower.corrected,upper.adj=upper.corrected) cnR <- abbreviate(rownames(r),minlength=minlength) cnC <- abbreviate(colnames(r),minlength=minlength) k <- 1 for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { rownames(ci)[k] <- paste(cnC[i],cnR[j],sep="-") k<- k +1 }} } else { #non symmetric case n.x <- NCOL(x) n.y <- NCOL(y) z <- fisherz(r) if(adjust != "holm") {dif.corrected <- qnorm(1-(1-alpha)/(n.x * n.y)) #we have already adjust alpha by 2 } else {ord <- order(abs(z),decreasing=FALSE) #to find the HOlm correction, we need to order the size of the correlations dif.corrected <- qnorm(1-(1-alpha)/(order(ord))) } sef <- 1/sqrt(n - 3) lower <- as.vector(fisherz2r(z - dif * sef)) upper <- as.vector(fisherz2r(z + dif * sef)) lower.corrected <- fisherz2r(z - dif.corrected * sef) upper.corrected <- fisherz2r(z + dif.corrected * sef) ci <- data.frame(lower=lower,r=as.vector(r),upper=upper,p=as.vector(p)) ci2 <- data.frame(lower=lower,r=as.vector(r),upper=upper,p=as.vector(p),pa=as.vector(pa)) #adding pa breaks two other packages ci.adj <- data.frame(lower.adj=as.vector(lower.corrected),r=as.vector(r),upper.adj= as.vector(upper.corrected)) cnR <- abbreviate(rownames(r),minlength=minlength) #added minlength as a parameter than fixed to 5 5/28/18 cnC <- abbreviate(colnames(r),minlength=minlength) k <- 1 for(i in 1:NCOL(y)) {for (j in 1:NCOL(x)) { rownames(ci)[k] <- paste(cnR[j],cnC[i],sep="-") rownames(ci2)[k] <- paste(cnR[j],cnC[i],sep="-") k<- k +1 }} } } else {ci <- ci2 <- sef <- ci.adj <- NULL } stars <- addStars(r,p,digits=2) result <- list(r = r,n=n,t=t,p=p,p.adj = pa,se=se,sef=sef, adjust=adjust,sym =sym,ci=ci, ci2 = ci2,ci.adj=ci.adj,stars=stars, Call=cl) class(result) <- c("psych", "corr.test") return(result) } #modified 1/4/14 to report sample size once if they are all equal #modified 3/12/14 to report confidence intervals (suggested by Alexander Weiss) #modified 3/27/14 to correct bug detected by Clemens Fell #modified 3/27/14 to correct bug reported by Louis-Charles Vannier #modified 2/21/15 to make confidence intervals an option (incredible decrease in speed if doing cis) #modified 8/24/17 to include Bonferoni adjusted confidence intervals #modified 3/27/21 to include the adjusted p values in the long output (requested by Alexander Weiss) "corr.p" <- function(r,n,adjust="holm",alpha=.05,minlength=5,ci=TRUE) { cl <- match.call() if(missing(n)) stop("The number of subjects must be specified") sym <- FALSE t <- (r*sqrt(n-2))/sqrt(1-r^2) #p <- 2*(1 - pt(abs(t),(n-2))) p <- -2 * expm1(pt(abs(t),(n-2),log.p=TRUE)) pa <- p p[p>1] <- 1 if (adjust !="none") { if(isSymmetric(unclass(p))) {sym <- TRUE lp <- upper.tri(p) #the case of a symmetric matrix pa <- p[lp] pa <- p.adjust(pa,adjust) p[upper.tri(p,diag=FALSE)] <- pa } else { p[] <- p.adjust(p ,adjust) #the case of an asymmetric matrix sym <- FALSE} } if(ci) { nvar <- ncol(r) if(sym) {z <- fisherz(r[lower.tri(r)])} else {z <- fisherz(r) n.x <- NCOL(r) n.y <- NROW(r) if(adjust != "holm") {dif.corrected <- qnorm((1-alpha/2)/(n.x * n.y)) # adjust alpha by 2 } else {ord <- order(abs(z),decreasing=FALSE) #to find the Holm correction, we need to order the size of the correlations dif.corrected <- qnorm(1-alpha/(2*(order(ord)))) }} if (min(n) < 4) { warning("Number of subjects must be greater than 3 to find confidence intervals.") } if(sym & is.matrix(n)) { se <- 1/sqrt(n[lower.tri(n)] - 3) } else { se <- 1/sqrt(n - 3)} if(sym) { dif.corrected <- qnorm(1-alpha/(nvar*(nvar-1))) } # 1- alpha/2 /nvar *(nvar-1) /2 alpha <- 1-alpha/2 dif <- qnorm(alpha) lower <- fisherz2r(z - dif * se) upper <- fisherz2r(z + dif * se) lower.corrected <- fisherz2r(z - dif.corrected * se) upper.corrected <- fisherz2r(z + dif.corrected * se) if(sym) {ci <- data.frame(lower=lower,r=r[lower.tri(r)],upper=upper,p=p[lower.tri(p)]) ci.adj <- data.frame(lower.adj = as.vector(lower.corrected),r=r[lower.tri(r)],upper.adj=as.vector(upper.corrected))} else { ci <- data.frame(lower=as.vector(lower),r=as.vector(r),upper=as.vector(upper),p=as.vector(p)) ci.adj <- data.frame(lower.adj =as.vector( lower.corrected),r=as.vector(r),upper.adj= as.vector(upper.corrected))} cnR <- abbreviate(colnames(r),minlength=minlength) rnR <- abbreviate(rownames(r),minlength=minlength) if(sym) {k <- 1 for(i in 1:(nvar-1)) {for (j in (i+1):nvar) { rownames(ci)[k] <- paste(cnR[i],rnR[j],sep="-") k<- k +1 } } } else {k <- 1 for(i in 1:ncol(r)) {for (j in 1:nrow(r)) { rownames(ci)[k] <- paste(cnR[i],rnR[j],sep="-") k<- k +1 }} } result <- list(r = r,n=n,t=t,p=p,sym=sym,adjust=adjust,ci=ci,ci.adj = ci.adj,Call=cl)} else { result <- list(r=r,n=n,p=p,Call=cl)} class(result) <- c("psych", "corr.p") return(result) } #revised March 28, 2014 to be compatible with corr.test #revised August 28, 2017 to include holm and bonferroini adjusted confidence intervals #revised February 17, 2019 to allow cis not to be found #could be replaced with the following corr.test1 <- function(x,y=NULL,use="pairwise",method="pearson",adjust="holm",alpha=.05){ cl <- match.call() if(is.null(y)) {r <- cor(x,use=use,method=method) sym <- TRUE n <- t(!is.na(x)) %*% (!is.na(x)) } else {r <- cor(x,y,use=use,method=method) sym=FALSE n <- t(!is.na(x)) %*% (!is.na(y))} if((use=="complete") | (min(n) == max(n))) n <- min(n) result <- corr.p(r,n,adjust=adjust,alpha=alpha) result$Call<- cl class(result) <- c("psych", "corr.test") return(result) } #added April 6, 2021 in response to a request by Uthpala Pinto addStars <- function(r,p,digits=2) { symp <- symnum(p, corr = FALSE,cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*"," "),legend=FALSE) stars <- paste0(round(r,digits),symp) nc <- NCOL(r) nr <- NROW(r) stars <- matrix(stars,nrow=nr,ncol=nc) colnames(stars) <- colnames(r) rownames(stars) <- rownames(r) # print(stars,quote=FALSE) return(stars) } #added 9/18/21 to find Spearman and Kendall probabilities prob.Kendall <- function(x=x,y=NULL,method=c("kendall","spearman"),continuity=FALSE) { exact <- FALSE alternative <- "two.sided" nvar <- NCOL(x) if(!is.null(y)) {nvary <- NCOL(y) sym <- FALSE} else {nvary<- nvar sym <- TRUE} r <- diag(.5,nvar,nvary) p <- diag(0,nvar,nvary) if(sym) { for(i in 2:(nvar) ) { xi <- x[,i] for (j in 1:(i-1)){ yj <- x[,j] temp <- cor.test(xi,yj,method,alternative=alternative,exact=exact) r[i,j] <- temp$estimate p[i,j]<- temp$p.value } } r <- r + t(r) p <- p + t(p) } else { for (i in 1:nvar) { if(nvar > 1) {xi <- x[,i]} else {xi <- x} #the weird case of a single variable for (j in 1: nvary) { if(nvary > 1) {yj <- y[,j]} else { yj <- y} temp <- cor.test(xi,yj,method,alternative=alternative,exact=exact) r[i,j] <- temp$estimate p[i,j]<- temp$p.value }} } rownames(r) <- rownames(p) <- colnames(x) if(is.null (y) ) {colnames(r) <- colnames(p) <- colnames(x)} else {colnames(r) <- colnames(p) <- colnames(y)} result <- list(r=r,p=p) return(result) } psych/R/stats.by.r0000644000176200001440000002744713327623154013557 0ustar liggesusers"error.crosses.by" <- function (x,y,z,labels=NULL,main=NULL,xlim=NULL,ylim= NULL,xlab=NULL,ylab=NULL,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,...) # x and y are data frame or descriptive stats {if(is.null(x$mean)) {x <- describe.by(x,z,mat=TRUE) } xmin <- min(x$mean) xmax <- max(x$mean) if(sd) {max.sex <- max(x$sd,na.rm=TRUE) if(is.null(xlim)) {xlim=c(xmin - max.sex,xmax + max.sex) }} else {max.sex <- max(x$se,na.rm=TRUE)} if(is.null(y$mean)) {y <- describe(y)} ymin <- min(y$mean) ymax <- max(y$mean) if(sd) {max.sey <- max(y$sd,na.rm=TRUE) if(is.null(ylim)) {ylim=c(ymin - max.sey,ymax +max.sey)}} else { max.sey <- max(y$se,na.rm=TRUE) } if(is.null(xlim)) xlim=c(xmin - 2*max.sex,xmax +2*max.sex) if(is.null(ylim)) ylim=c(ymin - 2*max.sey,ymax +2*max.sey) if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } if(is.null(xlab)) xlab <- "Group 1" if(is.null(ylab)) ylab <- "Group 2" plot(x$mean,y$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,...) cix <- qt(1-alpha/2,x$n) ciy <- qt(1-alpha/2,y$n) z <- dim(x)[1] if(sd) {x$se <- x$sd y$se <- y$sd cix <- ciy <- rep(1,z) } if (is.null(pos)) {locate <- rep(1,z)} else {locate <- pos} if (is.null(labels)) {labels <- rownames(x)} if (is.null(labels)) {lab <- paste("V",1:z,sep="")} else {lab <-labels} for (i in 1:z) {xcen <- x$mean[i] ycen <- y$mean[i] xse <- x$se[i] yse <- y$se[i] arrows(xcen-cix[i]* xse,ycen,xcen+ cix[i]* xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) arrows(xcen,ycen-ciy[i]* yse,xcen,ycen+ ciy[i]*yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) text(xcen,ycen,labels=lab[i],pos=locate[i],cex=1,offset=offset) #puts in labels for all points } } "ellipse" <- function (x,y,r1,r2,...) { #code adapted from John Fox segments=51 angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) xs <- r1 #ys <- e.size * yrange ellipse <- unit.circle ellipse[,1] <- ellipse[,1]*r1 + x ellipse[,2] <- ellipse[,2]*r2+ y #ys? lines(ellipse, ...) return(xs) } # modified 18/1/15 to pass just the xvar and yvar to statsBy # "errorCircles" <- # function (x,y,data,ydata=NULL,group=NULL,paired=FALSE, labels=NULL,main=NULL,xlim=NULL,ylim= NULL,xlab=NULL,ylab=NULL,add=FALSE,pos=NULL,offset=1,arrow.len=.2,alpha=.05,sd=FALSE,bars=TRUE,circles=TRUE,...) { # x and y are data frame or descriptive stats # # xvar <- x # yvar <- y # if(!is.null(group)) {data <- statsBy(data[,c(xvar,yvar,group)],group)} # x <- list() # if(paired) { # x$mean <- t(data$mean[,xvar]) # x$sd <- t(data$sd[,xvar]) # x$n <- t(data$n[,xvar]) # } else { #the normal case # x$mean <- data$mean[,xvar] # x$sd <- data$sd[,xvar] # x$n <- data$n[,xvar]} # # xmin <- min(x$mean,na.rm=TRUE) # xmax <- max(x$mean,na.rm=TRUE) # x$se <- x$sd/sqrt(x$n) # # if(sd) {max.sex <- max(x$sd,na.rm=TRUE) # if(is.null(xlim)) {xlim=c(xmin - max.sex,xmax + max.sex) }} else {max.sex <- max(x$se,na.rm=TRUE)} # # y <- list() # if(!is.null(ydata)) { # y$mean <- ydata$mean[,yvar] # y$sd <- ydata$sd[,yvar] # y$n <- ydata$n[,yvar] # } else { # y$mean <- data$mean[,yvar] # y$sd <- data$sd[,yvar] # y$n <- data$n[,yvar]} # # ymin <- min(y$mean,na.rm=TRUE) # ymax <- max(y$mean,na.rm=TRUE) # y$se <- y$sd/sqrt(y$n) # if(sd) {max.sey <- max(y$sd,na.rm=TRUE) # if(is.null(ylim)) {ylim=c(ymin - max.sey,ymax +max.sey)}} else { max.sey <- max(y$se,na.rm=TRUE) } # # if(is.null(xlim)) xlim=c(xmin - 2*max.sex,xmax +2*max.sex) # if(is.null(ylim)) ylim=c(ymin - 2*max.sey,ymax +2*max.sey) # # if(is.null(main)) {if(!sd) { main = paste((1-alpha)*100,"% confidence limits",sep="") } else {main= paste("Means and standard deviations")} } # if(paired) {if(is.null(xlab)) xlab <- "Group 1" # if(is.null(ylab)) ylab <- "Group 2" # } else { # if(is.null(xlab)) xlab <- colnames(data$mean)[xvar] # if(is.null(ylab)) ylab <- colnames(data$mean)[yvar] # } # if(add) { # if(paired) {points(x$mean,typ="p",...) } else {points(x$mean,y$mean,typ="p",...)} # } else { # if(paired) {plot(x$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,typ="p",...) } else {plot(x$mean,y$mean,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,main=main,typ="p",...)} # } # N <-x$n # Nmax <- max(N) # cix <- qt(1-alpha/2,x$n) # ciy <- qt(1-alpha/2,y$n) # if(paired) {z <- nrow(x$mean) } else {z <- length(x$mean)} # if(sd) {x$se <- x$sd # y$se <- y$sd # cix <- ciy <- rep(1,z) # } # # if (is.null(pos)) {locate <- rep(1,z)} else {locate <- pos} # if (is.null(labels)) {labels <- rownames(x$mean)} # if (is.null(labels)) {lab <- paste("V",1:z,sep="")} else {lab <-labels} # # for (i in 1:z) # { if(paired) { xcen <- x$mean[i,1] # ycen <- x$mean[i,2] # xse <- x$se[i,1] # yse <- x$se[i,2] # } else { # xcen <- x$mean[i] # ycen <- y$mean[i] # xse <- x$se[i] # yse <- y$se[i]} # if(bars) {if(max(x$se,na.rm=TRUE) > 0) arrows(xcen-cix[i]* xse,ycen,xcen+ cix[i]* xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) # if(max(y$se,na.rm=TRUE) >0 ) arrows(xcen,ycen-ciy[i]* yse,xcen,ycen+ ciy[i]*yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) # } # # text(xcen,ycen,labels=lab[i],pos=locate[i],cex=1,offset=offset) #puts in labels for all points # if(circles) { xrange <- xlim[2] - xlim[1] # yrange <- ylim[2] - ylim[1] # xscale <-max(x$se) # yscale <-max(y$se) # ellipse(xcen,ycen,sqrt(xscale*x$n[i]/Nmax),sqrt( yscale*x$n[i]/Nmax)) # } # } # if(!is.null(group)) return(invisible(data)) # } # # # "statsBy.old" <- # function (data,group,cors=FALSE) { # # valid <- function(x) { #count the number of valid cases # sum(!is.na(x)) # } # gr <- which(colnames(data) == group) # # z1 <- data[,group] # z <- z1 # cnames <- colnames(data) # for (i in 1:ncol(data)) {if(is.factor(data[,i]) || is.logical(data[,i])) { # data[,i] <- as.numeric(data[,i]) # # colnames(data)[i] <- paste(cnames[i],"*",sep="") # }} # xvals <- list() # # xvals$mean <- t(matrix(unlist(by(data,z,colMeans,na.rm=TRUE)),nrow=ncol(data))) # xvals$sd <-t(matrix(unlist(by(data,z,function(x) sapply(x,sd,na.rm=TRUE))),nrow=ncol(data))) # xvals$n <- t(matrix(unlist(by(data,z,function(x) sapply(x,valid))),nrow=ncol(data))) # colnames(xvals$mean) <- colnames(xvals$sd) <- colnames(xvals$n) <- colnames(data) # rownames(xvals$mean) <- rownames(xvals$sd) <- rownames(xvals$n) <- levels(z) # nH <- harmonic.mean(xvals$n) # GM <- colSums(xvals$mean*xvals$n)/colSums(xvals$n) # MSb <- colSums(xvals$n*t((t(xvals$mean) - GM)^2))/(nrow(xvals$mean)-1) #weight means by n # MSw <- colSums(xvals$sd^2*(xvals$n-1))/(colSums(xvals$n-1)) #find the pooled sd # xvals$F <- MSb/MSw # N <- colSums(xvals$n) # # npr <-(N^2 - colSums(xvals$n))/(N *(nrow(xvals$n) -1)) # # npr <- harmonic.mean(xvals$n-1) # npr <- (colSums(xvals$n-1)+nrow(xvals$n))/(nrow(xvals$n)) # xvals$ICC1 <- (MSb-MSw)/(MSb + MSw*(npr-1)) # xvals$ICC2 <- (MSb-MSw)/(MSb) # if(cors) { r <- by(data,z,function(x) cor(x[-1],use="pairwise")) # nvars <- ncol(r[[1]]) # xvals$r <- r # lower <- lapply(r,function(x) x[lower.tri(x)]) # xvals$within <- t(matrix(unlist(lower),nrow=nvars*(nvars-1)/2)) # wt <- by(data,z,function(x) pairwiseCount(x[-1])) # lower.wt <- t(matrix(unlist(lapply(wt,function(x) x[lower.tri(x)]) ) ,nrow=nvars*(nvars-1)/2)) # lower.wt <- t(t(lower.wt)/colSums(lower.wt,na.rm=TRUE)) # pool <- colSums( lower.wt * xvals$within,na.rm=TRUE) # pool.sd <- apply(xvals$within, 2,FUN=sd, na.rm=TRUE) # xvals$pooled <- matrix(NaN,nvars,nvars) # xvals$pooled[lower.tri(xvals$pooled)] <- pool # xvals$pooled[upper.tri(xvals$pooled)] <- pool # diag(xvals$pooled) <- 1 # xvals$sd.r <- matrix(NaN,nvars,nvars) # xvals$sd.r[lower.tri(xvals$sd.r)] <- pool.sd # xvals$sd.r[upper.tri(xvals$sd.r)] <- pool.sd # colnames(xvals$pooled) <- rownames (xvals$pooled) <- cnames[-1] # } # # nvar <- ncol(data)-1 # xvals$raw <- cor(data,use="pairwise") # new.data <- as.matrix( merge(xvals$mean,data,by=group,suffixes =c(".bg",""))[-1]) # # diffs <- new.data[,(nvar+1):ncol(new.data)] - new.data[,1:nvar] # colnames(diff) <- rownames(diff) <- paste(colnames(diff),".wg",sep="") # xvals$rbg <- cor(new.data[,1:nvar],use="pairwise") #the between group (means) # xvals$rwg <- cor(diffs,use="pairwise") #the within group (differences) # colnames(xvals$rwg) <- rownames(xvals$rwg) <- paste(colnames(xvals$rwg),".wg",sep="") # xvals$etabg <- cor(new.data[,1:nvar],new.data[,(nvar+1):ncol(new.data)],use="pairwise") #the means with the data # xvals$etawg <- cor(new.data[,(nvar+1):ncol(new.data)],diffs,use="pairwise") #the deviations and the data # rownames(xvals$etawg) <- paste(rownames(xvals$etawg),".wg",sep="") # # return(xvals) # } # "cor.wt" <- function(data,vars=NULL, w=NULL,sds=NULL, cor=TRUE) { cl <- match.call() if(is.list(data) && !is.data.frame(data)) {w <- data$n #use the output from statsBy sds <- data$sd x <- data$mean} else {x <- data} if(!is.null(vars)) {x <- x[,vars] w <- w[,vars] sds <- sds[,vars] } if(is.null(w)) w <- matrix(rep(rep(1/nrow(x),nrow(x)),ncol(x)),nrow=nrow(x),ncol=ncol(x)) if(is.null(ncol(w))) {wt <- w/sum(w) } else { wt <- t(t(w)/colSums(w))} cnames <- colnames(x) for (i in 1:ncol(x)) {if(is.factor(x[,i]) || is.logical(x[,i])) { x[,i] <- as.numeric(x[,i]) colnames(x)[i] <- paste(cnames[i],"*",sep="") }} means <- colSums(x * wt,na.rm=TRUE) xc <- scale(x,center=means,scale=FALSE) #these are the weighted centered data if(is.null(sds)) {xs <- xc /sqrt(w) } else {xs <- xc * sds/sqrt(w)} xwt <- sqrt(wt) * xc # added February 20, 2016 to consider missing values if(any(is.na(xwt))) { cov <- apply(xwt,2, function(x) colSums(xwt * x,na.rm=TRUE)) } else {#matrix algebra without matrices cov <- crossprod(xwt) } #/(1-colSums(wt^2,na.rm=TRUE)) if(cor) {r <- cov2cor(cov)} else {r <- cov} xw <- wt * xc result <-list(r=r,xwt = xwt,wt=wt,mean=means,xc=xc,xs=xs) result$Call <- cl class(result) <- c("psych","cor.wt") return(result)} psych/R/skew.R0000644000176200001440000001013014137330425012672 0ustar liggesusers#corrected May 7, 2007 #modified October ,2011 to use apply for mean and sd #modified April, 2012 to return 3 estimates, depending upon type #partly based upon e1071 skewness and kurtosis "skew" <- function (x, na.rm = TRUE,type=3) { if (length(dim(x)) == 0) { if (na.rm) { x <- x[!is.na(x)] } sdx <- sd(x,na.rm=na.rm) mx <- mean(x) n <- length(x[!is.na(x)]) switch(type, {skewer <- sqrt(n) *( sum((x - mx)^3, na.rm = na.rm)/( sum((x - mx)^2,na.rm = na.rm)^(3/2)))}, #case 1 {skewer <- n *sqrt(n-1) *( sum((x - mx)^3, na.rm = na.rm)/((n-2) * sum((x - mx)^2,na.rm = na.rm)^(3/2)))}, #case 2 {skewer <- sum((x - mx)^3)/(n * sd(x)^3) }) #case 3 } else { skewer <- rep(NA,dim(x)[2]) if (is.matrix(x)) {mx <- colMeans(x,na.rm=na.rm)} else {mx <- apply(x,2,mean,na.rm=na.rm)} sdx <- apply(x,2,sd,na.rm=na.rm) for (i in 1:dim(x)[2]) { n <- length(x[!is.na(x[,i]),i]) switch(type, {skewer[i] <-sqrt(n) *( sum((x[,i] - mx[i])^3, na.rm = na.rm)/( sum((x[,i] - mx[i])^2,na.rm = na.rm)^(3/2)))}, #type 1 {skewer[i] <- n *sqrt(n-1) *( sum((x[,i] - mx[i])^3, na.rm = na.rm)/((n-2) * sum((x[,i] - mx[i])^2,na.rm = na.rm)^(3/2)))},#type 2 {skewer[i] <- sum((x[,i] - mx[i])^3, na.rm = na.rm)/(n * sdx[i]^3)} #type 3 ) #end switch } #end loop } return(skewer) } #modified November 24, 2010 to use an unbiased estimator of kurtosis as the default #and again April 22, 2012 to include all three types "kurtosi" <- function (x, na.rm = TRUE,type=3) { if (length(dim(x)) == 0) { if (na.rm) { x <- x[!is.na(x)] } if (is.matrix(x) ) { mx <- colMeans(x,na.rm=na.rm)} else {mx <- mean(x,na.rm=na.rm)} sdx <- sd(x,na.rm=na.rm) n <- length(x[!is.na(x)]) switch(type, {kurt <- sum((x - mx)^4, na.rm = na.rm)*n /(sum((x - mx)^2,na.rm = na.rm)^2) -3}, #type 1 { kurt <- n*(n + 1)*sum((x - mx)^4, na.rm = na.rm)/( (n - 1)*(n - 2)*(n - 3)*(sum((x - mx)^2,na.rm = na.rm)/(n - 1))^2) -3 *(n- 1)^2 /((n - 2)*(n - 3)) }, # type 2 {kurt <- sum((x - mx)^4)/(n *sdx^4) -3} ) # type 3 } else { kurt <- rep(NA,dim(x)[2]) # mx <- mean(x,na.rm=na.rm) mx <-apply(x,2 ,mean,na.rm=na.rm) if(type==3) sdx <- apply(x,2,sd,na.rm=na.rm) for (i in 1:dim(x)[2]) { n <- length(x[!is.na(x[,i]),i]) switch(type, { kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)*length(x[,i]) /(sum((x[,i] - mx[i])^2,na.rm = na.rm)^2) -3}, #type 1 { xi <- x[,i]-mx[i] kurt[i] <- n*(n + 1)*sum((x[,i] - mx[i])^4, na.rm = na.rm)/( (n - 1)*(n - 2)*(n - 3)*(sum((x[,i] - mx[i])^2,na.rm = na.rm)/(n - 1))^2) -3 *(n- 1)^2 /((n - 2)*(n - 3)) } #type 2 , { kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) * sdx[i]^4) -3}, #type 3 {NULL}) names(kurt) <- colnames(x) }} return(kurt) } #added November 15, 2010 #adapted from the mult.norm function of the QuantPsyc package "mardia" <- function(x,na.rm=TRUE,plot=TRUE) { cl <- match.call() x <- as.matrix(x) #in case it was a dataframe if(na.rm) x <- na.omit(x) if(nrow(x) > 0) { n <- dim(x)[1] p <- dim(x)[2] x <- scale(x,scale=FALSE) #zero center S <- cov(x) S.inv <- solve(S) D <- x %*% S.inv %*% t(x) b1p <- sum(D^3)/n^2 b2p <- tr(D^2)/n chi.df <- p*(p+1)*(p+2)/6 k <- (p+1)*(n+1)*(n+3)/(n*((n+1)*(p+1) -6)) small.skew <- n*k*b1p/6 M.skew <- n*b1p/6 M.kurt <- (b2p - p * (p+2))*sqrt(n/(8*p*(p+2))) #p.skew <- 1-pchisq(M.skew,chi.df) p.skew <- -expm1(pchisq(M.skew,chi.df,log.p=TRUE)) #p.small <- 1 - pchisq(small.skew,chi.df) p.small <- -expm1(pchisq(small.skew,chi.df,log.p=TRUE)) p.kurt <- 2*(1- pnorm(abs(M.kurt))) d =sqrt(diag(D)) if(plot) {qqnorm(d) qqline(d)} results <- list(n.obs=n,n.var=p, b1p = b1p,b2p = b2p,skew=M.skew,small.skew=small.skew,p.skew=p.skew,p.small=p.small,kurtosis=M.kurt,p.kurt=p.kurt,d = d,Call=cl) class(results) <- c("psych","mardia") return(results) } else {warning("no cases with complete data, mardia quit.")} } psych/R/statsBy.boot.R0000644000176200001440000000242512723146766014340 0ustar liggesusersstatsBy.boot <- function (data,group,ntrials=10,cors=FALSE,replace=TRUE,method="pearson") { # cl <- match.call() result <- vector("list",ntrials) #supposedly allocates more memory for (i in 1:ntrials) { progressBar(i,ntrials,"statsBy") data[,group] <- sample(data[,group],size=nrow(data),replace=replace) result[[i]] <- statsBy(data,group,cors=cors,method=method) } return(result) } statsBy.boot.summary <- function(res.list,var="ICC2") { nreps <- length(res.list) nvar <- length(res.list[[1]][[var]]) cnames <- names(res.list[[1]][[var]]) temp <- matrix(NaN,ncol=nvar,nrow=nreps) colnames(temp) <- cnames for(i in 1:nreps){ temp[i,] <- res.list[[i]][[var]] } return(temp) } # crossValidate <- function(data,group,ntrials=10,cors=FALSE,replace=FALSE,method="pearson",x,y) { # cl <- match.call() # subjects <- 1:nrow(data) # sub2 <- nrow(data)/2 # result <- vector("list",ntrials) #supposedly allocates more memory # # for (i in 1:ntrials) { # progressBar(i,ntrials,"crossValidate") # samp <- sample(subjects,size =sub2,replace=replace) # resultA <- statsBy(data[samp,],group,cors=cors,method=method) # resultB <- statsBy(data[-samp,],group,cors=cors,method=method) # result[[i]] <- cor(resultA$mean,resultB, # # } # # # }psych/R/Promax.R0000644000176200001440000001631313653574146013214 0ustar liggesusers"Promax" <- function (x,m=4, normalize=FALSE, pro.m = 4) { if(missing(m)) m <- pro.m if(!is.matrix(x) & !is.data.frame(x) ) { if(!is.null(x$loadings)) x <- as.matrix(x$loadings) } else {x <- x} if (ncol(x) < 2) return(x) dn <- dimnames(x) xx <- stats::varimax(x) x <- xx$loadings Q <- x * abs(x)^(m - 1) U <- lm.fit(x, Q)$coefficients d <- try(diag(solve(t(U) %*% U)),silent=TRUE) if(inherits(d,"try-error")) {warning("Factors are exactly uncorrelated and the model produces a singular matrix. An approximation is used") ev <- eigen(t(U) %*% U) ev$values[ev$values < .Machine$double.eps] <- 100 * .Machine$double.eps UU <- ev$vectors %*% diag(ev$values) %*% t(ev$vectors) diag(UU) <- 1 d <- diag(solve(UU))} U <- U %*% diag(sqrt(d)) dimnames(U) <- NULL z <- x %*% U U <- xx$rotmat %*% U ui <- solve(U) Phi <- ui %*% t(ui) dimnames(z) <- dn class(z) <- "loadings" result <- list(loadings = z, rotmat = U,Phi = Phi) class(result) <- c("psych","fa") return(result) } #obviously a direct copy of the promax function, with the addition of returning the angles between factors #based upon a suggestion to the R-help news group by Ulrich Keller and John Fox. #added May 31st following suggestions to R-Help by Gunter Nickel "equamax" <- function(L, Tmat=diag(ncol(L)), eps=1e-5, maxit=1000) { #added kappa in the call to cfT May 3, 2020 kappa=ncol(L)/(2*nrow(L)) if(requireNamespace('GPArotation')) {GPArotation::cfT(L, Tmat=diag(ncol(L)),kappa=kappa, eps=eps, maxit=maxit)} else {stop("biquartimin requires GPArotation")}} #based completely on the GPArotation GPForth function #modified to call the varimin function which is derived from the varimax function varimin <- function(L, Tmat = diag(ncol(L)), normalize = FALSE, eps = 1e-05, maxit = 1000) { if(requireNamespace('GPArotation')) {GPArotation::GPForth(A=L,Tmat = diag(ncol(L)), normalize = normalize, eps = eps, maxit = maxit, method = "varimin") } else {stop("biquartimin requires GPArotation")}} vgQ.varimin <- function (L) { QL <- sweep(L^2, 2, colMeans(L^2), "-") list(Gq = L * QL, f = sqrt(sum(diag(crossprod(QL))))^2/4, Method = "varimin") } specialT <- specialQ <- function(L, Tmat = diag(ncol(L)), normalize = FALSE, eps = 1e-05, maxit = 1000) { write("A dummy function that can be replaced with either an orthogonal (specialT) or oblique (specialQ) call. You will need to supply it") list(NA) } #a general function to call a number of different rotation functions #meant to simplify code in fa, principal, faBy, but perhaps not ready for prime time #not yet included in the public package "faRotate" <- function(loadings,rotate="oblimin",...) { if((class(loadings)[1] == "psych") & is.list(loadings)) loadings <- loadings$loadings if (rotate=="varimax" |rotate=="Varimax" | rotate=="quartimax" | rotate =="bentlerT" | rotate =="geominT" | rotate =="targetT" | rotate =="bifactor" | rotate =="TargetT"| rotate =="equamax"| rotate =="varimin"|rotate =="specialT" | rotate =="Promax" | rotate =="promax"| rotate =="cluster" |rotate == "biquartimin" |rotate == "TargetQ" |rotate =="specialQ" ) { Phi <- NULL switch(rotate, #The orthogonal cases for GPArotation + ones developed for psych varimax = {rotated <- stats::varimax(loadings) #varimax is from stats, the others are from GPArotation loadings <- rotated$loadings}, Varimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #varimax is from the stats package, Varimax is from GPArotations #rotated <- do.call(rotate,list(loadings,...)) #rotated <- do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...)) rotated <- GPArotation::Varimax(loadings) loadings <- rotated$loadings} , quartimax = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings)) rotated <- GPArotation::quartimax(loadings) loadings <- rotated$loadings} , bentlerT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::bentlerT(loadings) loadings <- rotated$loadings} , geominT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::geominT(loadings) loadings <- rotated$loadings} , targetT = {if (!requireNamespace('GPArotation')) {stop("I am sorry, to do this rotation requires the GPArotation package to be installed")} #rotated <- do.call(rotate,list(loadings,...)) rotated <- GPArotation::targetT(loadings) loadings <- rotated$loadings} , bifactor = {loadings <- bifactor(loadings)$loadings}, #the next four solutions were not properly returning the values TargetT = {loadings <- TargetT(loadings,...)$loadings}, equamax = {loadings <- equamax(loadings)$loadings}, varimin = {loadings <- varimin(loadings)$loadings}, specialT = {loadings <- specialT(loadings)$loadings}, Promax = {pro <- Promax(loadings) loadings <- pro$loadings Phi <- pro$Phi }, promax = {pro <- Promax(loadings) loadings <- pro$loadings Phi <- pro$Phi }, cluster = {loadings <- varimax(loadings)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi}, biquartimin = {ob <- biquartimin(loadings,) loadings <- ob$loadings Phi <- ob$Phi}, TargetQ = {ob <- TargetQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi}, specialQ = {ob <- specialQ(loadings,...) loadings <- ob$loadings Phi <- ob$Phi}) } else { #The following oblique cases all use GPArotation if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...))) if(inherits(ob,as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi} } else {message("Specified rotation not found, rotate='none' used")} } result <- list(loadings=loadings,Phi=Phi) class(result) <- c("psych","fa") return(result) } psych/R/sim.congeneric.R0000644000176200001440000000523114340714762014640 0ustar liggesusers "congeneric.sim" <- function(loads = c(0.8, 0.7, 0.6, 0.5),N = NULL, err=NULL, short=TRUE) { n <- length(loads) loading <- matrix(loads, nrow = n) error <- diag(1, nrow = n) if (!is.null(err)) {diag(error) <- err} else { diag(error) <- sqrt(1 - loading^2) } pattern <- cbind(loading, error) colnames(pattern) <- c("theta", paste("e", seq(1:n), sep = "")) rownames(pattern) <- c(paste("V", seq(1:n), sep = "")) model <- pattern %*% t(pattern) if(!is.null(N)) {latent <- matrix(rnorm(N * (n + 1)), ncol = (n + 1)) observed <- latent %*% t(pattern) colnames(latent) <- c("theta", paste("e", seq(1:n), sep = "")) if(short) model <- cor(observed) } if (short) {return(model)} else {result <- list(model=model,pattern=pattern,r=cor(observed),latent=latent,observed=observed,N=N) class(result) <- c("psych","sim") return(result)} } "sim.congeneric" <- function(loads = c(0.8, 0.7, 0.6, 0.5),N = NULL, err=NULL, short=TRUE,categorical=FALSE, low=-3,high=3,cuts=NULL, minor=FALSE,fsmall = c(-.2,.2)) { n <- length(loads) fs <- 0 loading <- matrix(loads, nrow = n) if(minor) { fsmall <- c(fsmall,rep(0,n/4)) fs <- matrix(sample(fsmall,n*floor(n/2),replace=TRUE),ncol=floor(n/2)) loading <- cbind(loading,fs)} error <- diag(1, nrow = n ) if (!is.null(err)) {diag(error) <- err} else { diag(error) <- sqrt(1 - rowSums(loading^2)) } pattern <- cbind(loading, error) if(minor) { colnames(pattern) <- c("theta",paste0("minor",1:NCOL(fs)), paste0("e", seq(1:n)) ) } else {colnames(pattern) <- c("theta", paste0("e", seq(1:n))) } rownames(pattern) <- c(paste("V", seq(1:n), sep = "")) model <- pattern %*% t(pattern) if(!is.null(N)) {latent <- matrix(rnorm(N * (n + 1 +minor*NCOL(fs))), ncol = (n + 1 +minor*NCOL(fs))) observed <- latent %*% t(pattern) if(is.null(cuts)) { if (categorical) { observed = round(observed) #round all items to nearest integer value observed[(observed<= low)] <- low observed[(observed>high) ] <- high } } else { temp <- observed ncuts <- length(cuts) temp[(observed<= cuts[1])] <- 1 if(ncuts > 1) {for (nc in 2:ncuts) {temp[(observed > cuts[nc-1]) & (observed <= cuts[nc])] <- nc}} temp[(observed > cuts[ncuts])] <- ncuts+1 observed <- temp-1 } colnames(latent) <- colnames(pattern) #c("theta", paste("e", seq(1:n), sep = "")) if(short) model <- cor(observed) } if (short) {return(model)} else { if(!is.null(N)) { result <- list(model=model,pattern=pattern,r=cor(observed),latent=latent,observed=observed,N=N) } else { result<- model} class(result) <- c("psych","sim") return(result)} } psych/R/misc.R0000644000176200001440000006112214551342736012673 0ustar liggesusers#A number of useful helper functions #added January, 2012 #most are public, some are local just for me #a dummy function to allow the help to find misc "psych.misc" <- function() {} "lowerMat" <- function(R,digits=2,minlength=5) { lowleft <- lower.tri(R,diag=TRUE) nvar <- ncol(R) nc <- digits+3 width <- getOption("width") k1 <- width/(nc+2) if(is.null(colnames(R))) {colnames(R) <- paste("C",1:nvar,sep="")} if(is.null(rownames(R))) {rownames(R) <- paste("R",1:nvar,sep="")} colnames(R) <- abbreviate(colnames(R),minlength=minlength) nvar <- ncol(R) nc <- digits+3 #k1 <- width/(nc+2) if(k1 * nvar < width) {k1 <- nvar} k1 <- floor(k1) if(!is.character(R) ) { fx <- format(round(R,digits=digits))} else {fx <- format(R)} if(nrow(R) == ncol(R) ) {fx[!lowleft] <- ""} for(k in seq(0,nvar,k1)) { if(k 0) { rown <- names(temp)[-which(rownn==TRUE)] } else {rown <- names(temp) } result <- t(matrix(unlist(temp),nrow=nvar)) colnames(result) <- cname rownames(result) <- rown } return(result)} "rmssd" <- function(x,group=NULL,lag=1,na.rm=TRUE) { return(sqrt(mssd(x,group=group,lag=lag,na.rm=na.rm))) } ##### Added March 1, 2017 "autoR" <- function(x,group=NULL,lag=1,na.rm=TRUE,use="pairwise") { if(is.null(group)) { n.obs <- NROW(x) if(is.vector(x)) { x <- as.vector(scale(x,scale=FALSE)) mssd <- sum(diff(x,lag=lag,na.rm=na.rm)^2,na.rm=na.rm)/(sum(!is.na(x))-lag) v1 <- sd(x[1:(n.obs-lag)],na.rm=na.rm)^2 v2 <- sd(x[(lag+1):n.obs],na.rm=na.rm)^2 # r <- -(mssd - v1 - v2)/(2*sqrt(v1*v2)) r <- cor(x[1:(n.obs-lag)],x[(lag+1):n.obs],use=use) result <- list(autoR = r,rssd=sqrt(mssd)) #fixed May 10 ,2017 to correct autorR- } else { x <- as.matrix(x) n <- colSums(!is.na(x)) mssd <- colSums(diff(x,lag=lag,na.rm=na.rm)^2,na.rm=na.rm)/(n-lag) v1 <- apply(x[1:(n.obs-lag),,drop=FALSE],2,sd, na.rm=na.rm)^2 v2 <- apply(x[(lag+1):n.obs,,drop=FALSE],2, sd,na.rm=na.rm)^2 # r <- -(mssd - v1 - v2)/(2*sqrt(v1*v2)) r <- diag(cor(x[1:(n.obs-lag),],x[(lag+1):n.obs,],use=use)) result <- list(autoR = r,rssd=sqrt(mssd)) } } else { cl <- match.call() x <- as.matrix(x) #added 26/5/14 if(NROW(group) != NROW(x)) group <- x[,group] #added 26/5/14 nvar <- ncol(x) cname <- colnames(x) temp <- by(x,group, autoR,na.rm=na.rm,lag=lag) rownn <- lapply(temp,is.null) if(sum(as.integer(rownn)) > 0) { rown <- names(temp)[-which(rownn==TRUE)] } else {rown <- names(temp) } tm <- t(matrix(unlist(temp),nrow=nvar*2)) autoR <- tm[,1:nvar] rmssd <- tm[,(nvar+1):(nvar*2)] colnames(autoR) <- colnames(rmssd) <- cname rownames(autoR) <- rownames(rmssd) <- rown result <- list(autoR = autoR,rmssd=rmssd,Call=cl) } class(result) <- c("psych","autoR") return(result)} ##### sim.mssd <- function(n,r,g=.1) { rw <- rnorm(n,sqrt(1/(1-r^2))) x <- xg <- rep(0,n) for(i in 2:n) {x[i] <- r*x[i-1] + rw[i] xg[i] <- x[i] + g*i } rx <- sample(x,n,replace=FALSE) x2 <- x*2 rx2 <- rx*2 x.df <- data.frame(x,rx,x2,rx2,xg) return(x.df)} #shannon complexity index "shannon" <- function(x,correct=FALSE,base=2) {if(is.null(dim(x))) { t <- table(x) s <- sum(t) p <- t/s H <- -sum(p * log(p,base)) if(correct) { Hmax <- -log(1/length(p),base) H <- H/Hmax} } else { H <- apply(x,2,function(x) shannon(x, correct=correct, base=base))} return(H) } test.all <- function(p) { library(p,character.only=TRUE) ob <- paste("package",p,sep=":") ol <- objects(ob) nf <- length(ol) for(i in 1:nf) { fn <- as.character(ol[[i]]) example(topic=fn,package=p,character.only=TRUE) } detach(ob,character.only=TRUE) } #lookup a set of items from a bigger set lookupItem <- function(x,y) { n.look <- NROW(x) possible <- list() y <- as.character(y) x <- as.character(x) for(i in 1:n.look){ temp <- grep(x[i],y) if(length(temp)>0) possible[i]<- temp } return(possible) } #lookup which x's are found in y[c1],return matches for y[] "lookup" <- function(x,y,criteria=NULL) { if (is.null(criteria)) {temp <- match(x,rownames(y))} else { temp <- match(x,y[,criteria])} if(any(!is.na(temp))) { y <- (y[temp[!is.na(temp)],,drop=FALSE]) } else {y <- NA} return(y)} #use lookup to take fa/ic output and show the results #modified July 4, 2017 to allow for omega output as well #modified June 23, 2018 to limit to top n items per factor and abs(loading) > cut #modified April 30, 2020 to include the ability to handle bassAckward output "fa.lookup" <- function(f,dictionary=NULL,digits=2,cut=.0,n=NULL,sort=TRUE) { omega <- bassAck <- none <- lavaan <- NA #weird requirement to avoid being global if(length(class(f)) > 1){ obnames <- cs(omega, fa, principal, iclust,bassAck, none) value <- inherits(f, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"}} if(sort & (value !="bassAck")) {f <- fa.sort(f)} switch(value, omega = {f <- f$schmid$sl h2 <- NULL}, fa = { h2 <- f$communality com <- f$complexity f <- f$loading }, principal= { h2 <- f$communality com <- f$complexity f <- f$loading}, iclust = {f <- f$loadings h2 <- NULL}, bassAck = {nlevels <- length(f$bass.ack) h2 <- NULL ord1 <- rownames(fa.sort(f$bass.ack[[nlevels-1]])) f <- f$bass.ack[[nlevels]] f <- f[,ord1] f <- fa.sort(f) colnames(f) <- paste0("F",1:ncol(f)) }, none = {f <- f h2 <- NULL}) n.fact <- NCOL(f) ord <- rownames(f) old.names <- ord ord <- sub("-","",ord) rownames(f) <- ord if(!is.null(dictionary)) { contents <- lookup(rownames(f),dictionary)} else {message("fa.lookup requires a dictionary, otherwise just use fa.sort")} if(!is.null(h2)) {results <- data.frame(round(unclass(f),digits=digits),h2=round(h2,digits=digits),com=round(com,digits=digits))} else { results <- data.frame(round(unclass(f),digits=digits))} results <- merge(results,contents,by="row.names",all.x=TRUE,sort=FALSE) rownames(results) <- results[,"Row.names"] results <- results[ord,-1] #now put it back into the correct order rownames(results) <- old.names if(!is.null(n)) { rn <-rownames(results) results <- cbind(results,rn) f2c <- table(apply(abs(results[1:n.fact]),1,which.max)) #which column is the maximum value k <- 1 j <- 1 for(i in 1:n.fact) { results[k:(k+min(n,f2c[i])),] <- results[j:(j+ min(n,f2c[i])),] k <- (k+min(n,f2c[i])) j <- j + f2c[i] } results <- results[1:(k-1),] rownames(results) <- results[,"rn"] results <- results[,-NCOL(results)] } if(cut > 0) { r.max <- apply(abs(results[,1:n.fact]),1,max) results <- results[abs(r.max) > cut,] } return(results)} #revised 07/07/18 to add the cluster option #revised 12/07/18 to allow for simple matrix input #read a matrix, return a matrix #read a list, return a list #modified 9/2/20 for the case of not full rank cluster scores "fa.organize" <- function(fa.results,o=NULL,i=NULL,cn=NULL,echelon=TRUE,flip=TRUE) { if(is.null(names(fa.results)) ) {temp <- fa.results #the matrix form if(flip) { total.load <-colSums(temp) flipper <- sign(total.load) flipper[flipper==0] <-1 temp <- t( t(temp) * flipper ) } if(!is.null(o)) {temp <- temp[,o]} if(!is.null(i)) {temp <-temp[i,]} fa.results <- temp nf <- ncol(temp)} else { # the list form nf <- ncol(fa.results$loadings) if(echelon & is.null(o) ) {temp <- apply(abs( fa.results$loadings),1,which.max) nf <- ncol(fa.results$loadings) nvar <- nrow(fa.results$loadings) o <- rep(NA,nf) k <- 1 o[k] <- temp[k] for (ki in 2:nvar) {if (!(temp[ki] %in% o[1:k])) {o[k+1] <- temp[ki] k <- k + 1} } } #now, a kludge for the case where there are some empty columns n.good <- sum(1:nf %in% o) if (n.good < nf) { tt <- 1:nf o[(n.good + 1):nf] <- tt[!1:nf %in% o] } if(flip) { total.load <- colSums(fa.results$loadings) flipper <- sign(total.load) flipper[flipper==0] <-1 } else { flipper <- rep(1,NCOL(fa.results$loadings)) } fa.results$loadings <- t(t(fa.results$loadings) * flipper) if(!is.null(o)) {fa.results$loadings <- fa.results$loadings[,o] flipper <- flipper[o] fa.results$Structure <- t(t(fa.results$Structure[,o]) * flipper) fa.results$valid <- t(t(fa.results$valid[o])*flipper) fa.results$score.cor <- fa.results$score.cor[o,o] fa.results$r.scores <- fa.results$r.scores[o,o] fa.results$R2 <- fa.results$R2[o] if(!is.null(cn)) {colnames(fa.results$loadings) <- cn} fa.results$Phi <- fa.results$Phi[o,o]} if(!is.null(i)) {fa.results$loadings <- fa.results$loadings[i,] fa.results$Structure <- fa.results$Structure[i,] fa.results$weights <- fa.results$weights[i,] fa.results$complexity=fa.results$complexity[i] fa.results$uniquenesses <- fa.results$uniquenesses[i]} } return(fa.results) } #fixed 13/6/14 to solve the missing data problem "con2cat" <- function(old,cuts=c(0,1,2,3),where) { new <- old nc <- length(cuts) if(missing(where)) where <- 1:ncol(old) lw <- length(where) if(is.matrix(cuts)) {mcuts <- cuts} else {mcuts <- matrix(rep(cuts,lw),nrow=lw,byrow=TRUE)} vwhere <- as.vector(where) for (w in 1:lw) {where <- vwhere[w] cuts <- mcuts[w,] nc <- length(cuts) if(nc < 2 ) {new[(!is.na(new[,where]) & ( old[,where] <= cuts)),where] <- 0 new[(!is.na(new[,where]) & ( old[,where] > cuts)),where] <- 1} else { new[(!is.na(new[,where]) & ( old[,where] <= cuts[1])),where] <- 0 for(i in(2:nc)) { new[(!is.na(new[,where]) & ( old[,where] > cuts[i-1] )),where] <- i-1 # & (new[(!is.na(new[,where]) & ( old[,where] > cuts[i-1] )),where]),where] <- i-1 } new[(!is.na(new[,where]) & ( old[,where] > cuts[nc])),where] <- nc } } new} "keys.lookup" <- function(keys.list,dictionary) { if(is.list(keys.list)) { items <- sub("-","",unlist(keys.list)) f <- make.keys(items,keys.list)} keys.list <- fa.sort(f) contents <- lookup(rownames(f), y=dictionary) rownames(contents)[rowSums(f) <0 ] <- paste0(rownames(contents)[rowSums(f)<0],"-") return(contents) } #created 1/15/24 "itemSort" <- function(m, dictionary, ascending=TRUE, digits = 2) { contents <- lookup(rownames(m), y=dictionary) results <- data.frame(means=round(m,digits=digits),contents) results <- psychTools::dfOrder(results,ascending=ascending) return(results) } #patched 1/15/24 to give means "item.lookup" <- function (f,m, dictionary,cut=.3, digits = 2) { # f <- fa.sort(f) none<- NULL #A strange requirement of R 4.0 if(length(class(f)) > 1){ obnames <- cs(omega, fa, principal, iclust, none) value <- inherits(f, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} f <- fa.sort(f) } else {value <- "none"} old.names <- NULL switch(value, omega = {f <- f$schmid$sl h2 <- NULL old.names <- rownames(f) rownames(f) <- sub("-","",old.names)}, fa = { h2 <- f$communality com <- f$complexity f <- f$loading }, principal= { h2 <- f$communality com <- f$complexity f <- f$loading}, iclust = {f <- f$loadings h2 <- NULL}, none = {f <- f h2 <- NULL}) if (!(is.matrix(f) || is.data.frame(f))) { h2 <- f$communality com <- f$complexity ord <- rownames(f$loadings) nfact <- ncol(f$loadings) f <- f$loadings } else { h2 <- NULL com <- NULL ord <- rownames(f) nfact <- ncol(f) } if(!is.null(dim(m))) { means <- m[ord,,drop=FALSE] #fixed 1/15/24 } else {means <- m} f <- data.frame(unclass(f),means=means) contents <- lookup(rownames(f), y=dictionary) if (!is.null(h2)) { results <- data.frame(round(f, digits = digits), com = round(com, digits = digits), h2 = round(h2, digits = digits)) } else { results <- data.frame(round(f, digits = digits)) } results <- merge(results, contents, by = "row.names", all.x = TRUE, sort = FALSE) rownames(results) <- results[, "Row.names"] results <- results[ord, -1] if(!is.null(old.names)) rownames(results) <- old.names res <- results # res <- results[0,] #make an empty data frame of the structure of results # for (i in 1:nfact) { temp <- results[abs(results[,i]) > cut,] # ord <- order(temp[,"means"]) # res <- rbind(res,temp[ord,]) # } return(res) } lmCorLookup <- setCorLookup <- function(x,dictionary=NULL,cut=0,digits=2,p=.05) { coef <- x$coefficients probs <- x$Probability labels <- dictionary[rownames(coef),,drop=FALSE] coef[probs > p] <- NA result <- list() nvar <- NCOL(coef) for (i in 1:nvar) { ord <- order(abs(coef[,i]),decreasing=TRUE) temp <- cbind(coef=round(coef[ord,i],digits),labels[ord,]) result[[i]] <- data.frame(temp[!is.na(temp[,1]),]) } names(result) <- colnames(coef) result } "lookupItems" <- function(content=NULL,dictionary=NULL,search=c("Item","Content","item")) { location <-which(colnames(dictionary) %in% search) value <- grep(content,dictionary[,location]) dictionary[value,,drop=FALSE] } "falsePositive" <- function(sexy=.1,alpha=.05,power=.8) { pf <- alpha * (sexy) vp <- power * (1-sexy) pf/(pf+vp)} "build.html.help" <- function(p="psych",fn = "/Volumes/Test/psych/man",out="/Volumes/Test/help/") { db <- list.files(fn) for (f in db) {tools::Rd2HTML(paste(fn,db[f]),out=paste(out,db[f]),package=p) } } "bullseye" <- function(x,y,n) { for(i in 1:n) {dia.ellipse(x,y,e.size=i)}} "rel.val" <- function(n,sdx=.2,bars=TRUE,arrow.len=.05) { if(n>20) {pc <- "."} else {pc <- 16} plot(NA,xlim=c(0,10),ylim=c(0,10),axes=FALSE,xlab="",ylab="",main="Reliability and Validity as target shooting") #Reliable and valid x=3 y=2 bullseye(x,y,4) x1 <- x + rnorm(n,0,sdx) y1 <- y + rnorm(n,0,sdx) xm <- mean(x1) ym <- mean(y1) points(x1,y1,pch=pc) points(xm,ym,pch=20,col="red") if(bars) error.crosses(x1,y1,add=TRUE,arrow.len=arrow.len,labels="") text(x,y-2,"Reliable and valid") #unReliable and invalid x=7 y=7 bullseye(x,y,4) x1 <- x + rnorm(n,1,1) y1 <- y + rnorm(n,1,1) xm <- mean(x1) ym <- mean(y1) points(x1,y1,pch=pc) points(xm,ym,pch=20,col="red") if(bars) error.crosses(x1,y1,add=TRUE,arrow.len=arrow.len,labels="") text(x,y-2,"Unreliable and Invalid") #reliable and invalid x=7 y=2 bullseye(x,y,4) x1 <- x + rnorm(n,1,sdx) y1 <- y + rnorm(n,1,sdx) xm <- mean(x1) ym <- mean(y1) points(x1,y1,pch=pc) points(xm,ym,pch=20,col="red") if(bars)error.crosses(x1,y1,add=TRUE,arrow.len=arrow.len,labels="") text(x,y-2,"Reliable and Invalid") #unreliable, but valid x=3 y=7 bullseye(x,y,4) x1 <- x + rnorm(n,0,1) y1 <- y + rnorm(n,0,1) xm <- mean(x1) ym <- mean(y1) points(x1,y1,pch=pc) points(xm,ym,pch=20,col="red") if(bars) error.crosses(x1,y1,add=TRUE,arrow.len=arrow.len,labels="") text(x,y-2,"Unreliable but Valid") } #rel.val(10,.5) # "cor2" <- function(x,y,digits=2,use="pairwise",method="pearson") { # R <- cor(x,y,use=use,method=method) # print(round(R,digits)) # invisible(R)} #finally added the char2numeric so it will not choke on character variables 1/3/21 #added the cor option 11/18/23 "cor2" <- function(x,y=NULL,digits=2,use="pairwise",method="pearson",cor="cor",show=TRUE) { multi <- FALSE if(is.list(x) && is.null(y)) {multi <- TRUE n <- length(x) xi <- x[[1]] for (i in 2:n) {xi <- cbind(xi,x[[i]])} switch(cor, cor= {R <- cor(xi,use=use,method=method) }, tetrachoric = {R <- tetrachoric(xi)$rho}, polychoric= {R <- polychoric(xi)$rho}, cov = {R <- cov(xi, use=use)}, mixed = {R <- mixedCor(xi)$rho} ) }else { x <- char2numeric(x) y <- char2numeric(y) switch(cor, cor= {R <- cor(x,y,use=use,method=method)}, tetrachoric = {R <- tetrachoric(x,y)$rho}, polychoric= {R <- polychoric(x,y)$rho}, cov = {R <- cov(x,y, use=use)}, mixed = {R <- mixedCor(cbind(x,y))$rho} ) } if(show) { if(multi) {lowerMat(R,digits) } else {print(round(R,digits))} } invisible(R)} levels2numeric <- function(x) { n.var <- ncol(x) for(item in 1:n.var) { if (is.factor(x[,item])) x[,item] <- as.numeric(x[,item])} invisible(x) } signifNum <- function(x,digits=2) { if(!is.null(ncol(x))) {sign <- rep(1,prod(dim(x)))} else { sign <- rep(1,length(x))} sign[which(x < 0)] <- -1 base <- trunc(log10(sign*x)) mantissa <- x/10^base pretty <- round(mantissa,digits=digits-1) * 10^base pretty[which ((sign * x) == 0,arr.ind=TRUE)] <- 0 #fix the ones that are -Inf pretty} #October 25, 2016 #June 18 2020 added as.factor to convert character strings that are not stored as factors #this has a downsize that it converts numbers stored as characters to factors (see nchar2numeric) #added the flag option and the change the colname option 1/2/21 "isAllNumeric" <- function(x){ nvar <- NCOL(x) bad <- rep(0,nvar) for(i in 1:nvar) { if(!is.numeric(x[[i]]) ){if(is.factor(unlist(x[[i]]))| is.character(unlist(x[[i]])) ) bad [i] <- 1 }} if(prod(bad)==0) {result <- TRUE} else {result<- bad} invisible(result) } "char2numeric" <- function(x,flag=TRUE) { nvar <- NCOL(x) for(i in 1:nvar) { if(!is.numeric(x[[i]] )) { if(is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) { x[[i]] <- as.numeric(as.factor(x[[i]])) } else {x[[i]] <- NA} if(flag) colnames(x)[i] <- paste0(colnames(x)[i],"*") } } invisible(x)} #added June 25, 2020 to handle the case of numeric data stored as characters #added flag and colnames option 1/2/21 "nchar2numeric" <- function(x,flag=TRUE) { nvar <- NCOL(x) for(i in 1:nvar) { if(!is.numeric(x[[i]] )) { if(is.factor(unlist(x[[i]])) | is.character(unlist(x[[i]]))) { if(is.factor(unlist(x[[i]]))) { x[[i]] <- as.numeric(as.factor(x[[i]])) } else {x[[i]] <- as.numeric(x[[i]])} } else {x[[i]] <- NA} if(flag) colnames(x)[i] <- paste0(colnames(x)[i],"*") } } invisible(x)} #this just shows if it is a matrix is symmetric and has diagonals of 1 #Added the unclass to handle a problem with class partial.r 4/10/21 "isCorrelation" <- function(x,na.rm=FALSE) {value <- FALSE if(NROW(x) == NCOL(x)) { if( is.data.frame(x)) {if(isSymmetric(unclass(unname(as.matrix(x))))) { value <- TRUE}} else {if(isSymmetric(unclass(unname(x)))) {value <- TRUE}} value <- value && isTRUE(all.equal(prod(diag(as.matrix(x))),1) ) if(!value && (na.rm) && any(is.na(diag(as.matrix(x))))) stop("Although the matrix is symmetric, one of the elements of the diagonal is NA. Check your data.") value <- value && isTRUE((min(x,na.rm=TRUE)>= -1) & (max(x,na.rm=TRUE) <= 1)) } return(value)} #this just shows if it is a symmetric matrix "isCovariance" <- function(x) {value <- FALSE if(NROW(x) == NCOL(x)) { if( is.data.frame(x)) {if(isSymmetric(unclass(unname(as.matrix(x))))) { value <- TRUE}} else {if(isSymmetric(unclass(unname(x)))) {value <- TRUE}}} # value <- value && isTRUE(all.equal(prod(diag(as.matrix(x))),1) ) #don't check for diagonal of 1 return(value)} #cs is taken from Hmisc:::Cs cs <- function(...) {as.character(sys.call())[-1]} #acs is modified to produce a single string acs <- function(...) {gsub(",","",toString(sys.call()[-1]))} fromTo <- function(data,from,to=NULL) {cn <- colnames(data) if(is.null(to)) {to <- from[2] from <- from[1]} from <- which(cn == as.character(from)) to = which(cn == as.character(to)) select <- from:to return(data[select])} #flip -- not public but useful trick flip <- function(R,key) {#reverse correlations with keys < 0 R[key<0,] <- -R[key < 0,] R[,key<0] <- -R[,key < 0] return(R)} #do matrix multiplication with missing values matMult <- function(x,y) { #matrix multiplication without matrices! nvar1 <- NCOL(x) nvar2 <- NROW(y) M <- matrix(NA,ncol =NCOL(y),nrow=NROW(x)) if(NCOL(x) != NROW(y)) {stop("incompatible dimensions")} for(i in 1:NROW(x)) { for (j in 1:NCOL(y)) { M[i,j] <- sum(x[i,] * y[,j], na.rm=TRUE) } } return(M) } factorScoresSapa <- function(weights,items) { nf <- NCOL(weights) scores<- matrix(NA,nrow=NROW(items),ncol=nf) n.obs <- NROW(items) n.items <- NCOL(items) titem <- t(items) for (factors in (1:nf)) { scores[,factors] <-t(colMeans( weights[,factors] * titem, na.rm=TRUE)) } scores<- scale(scores) #return standardized values return(scores) } countBad <- function(r.list) { n <- length(r.list) count <- 0 for (i in 1:n) { ev <-eigen(r.list[[i]]) bad <- any(ev$values < 0) count <- count + bad} return(count) } #Create SAPA data by sampling SAPAfy <- function(x,y) { n_vars <- ncol(x) n_questions_unanswered <- round(n_vars - y) # in case someone inputs a double. for (xxx in 1:nrow(x)) { x[xxx, sample.int(n_vars, n_questions_unanswered) ] <- NA } return(x) } psych/R/irt.person.rasch.R0000644000176200001440000000162310474443510015132 0ustar liggesusers#steps towards an IRT program #we find the difficulties using ir.item.diff.rasch #now estimate the thetas #Then, to find the person parameters, use optimize "irt.person.rasch" <- function(diff,items) { # #the basic one parameter model irt <- function(x,diff,scores) { fit <- -1*(log(scores/(1+exp(diff-x)) + (1-scores)/(1+exp(x-diff)))) mean(fit,na.rm=TRUE) } # diff<- diff items <-items num <- dim(items)[1] fit <- matrix(NA,num,2) total <- rowMeans(items,na.rm=TRUE) count <- rowSums(!is.na(items)) for (i in 1:num) { if (count[i]>0) {myfit <- optimize(irt,c(-4,4),diff=diff,scores=items[i,]) #how to do an apply? fit[i,1] <- myfit$minimum fit[i,2] <- myfit$objective #fit of optimizing program } else { fit[i,1] <- NA fit[i,2] <- NA } #end if else } #end loop irt.person.rasch <-data.frame(total,theta=fit[,1],fit=fit[,2],count)}psych/R/score.irt.r0000644000176200001440000010126114547536246013715 0ustar liggesusers#revised August 31, 2012 to allow for estimation of all 0s or all 1s #modified March 10, 2016 to allow for quasi Bayesian estimates using normal theory #which doesn't seem to do what I want to do, so we are not doing it #June 30-July 7 , 2016 Trying to make it faster by parallelizing the code and #reducing the number of items to score when using keys. #uses local minima -- problematic for small number of items #corrected various problems with finding total (sum) scores 7/4/16 #starting to make parallel for speed #seems to result in at least a factor of 2 improvement #when using stats from fa, the discrim parameter are not necessarily in the order from a keyslist #this is only a problem if selecting items from a larger set of items #fixed this August 21, 2016 #the irt.2 function (dichotomous items) iis much slower than the polytomous solution #probably because we took parallelization one step too far #I have not removed that extra level #Added a check for cells with zero entries 01/10/24 #### The scoring of dichotomous data #the function to do 2 parameter dichotomous IRT\ #these should be put into the score.irt.2 function for speed #taken out for debugging purposes irt.2par.norm <- function(x,delta,beta,scores) { fit <- -1*(log(scores*(1-pnorm(beta*(delta-x))) + (1-scores)*(1-pnorm(beta*(x-delta))))) mean(fit,na.rm=TRUE) } #This does the logistic fit irt.2par <- function(x,delta,beta,scores) { fit <- -1*(log(scores/(1+exp(beta*(delta-x))) + (1-scores)/(1+exp(beta*(x-delta))) )) mean(fit,na.rm=TRUE) } #These next two functions were added to add limits to the fitting functions for the cases of all wrong and all right irtLimit.2par <- function(x,delta,beta,scores) { minItem <- which.min(delta*beta) maxItem <- which.max(delta*beta) fit <- -scores*log(1/(1+exp(beta*(delta-x)))) - (1-scores)*log(1/(1+exp(beta*(x-delta)))) - log(1/(1+exp(beta[minItem] *(delta[minItem]-x-1)))) - log(1/(1+exp(beta[maxItem] *(x-delta[maxItem]-1 ))) ) mean(fit,na.rm=TRUE) } irtLimit.2par.norm <- function(x,delta,beta,scores) { minItem <- which.min(delta*beta) maxItem <- which.max(delta*beta) fit <- -( scores*log(1-pnorm(beta*(delta-x))) +(1-scores)*log(1-pnorm(beta*(x-delta))) +log(1-pnorm(beta[minItem]*(delta[minItem]-x -1 ))) + log( 1-pnorm(beta[maxItem]*(x- delta[maxItem] -1 )) ) ) mean(fit,na.rm=TRUE) } "score.irt.2" <- function(stats,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { #find the person parameters in a 2 parameter model we use deltas and betas from irt.discrim and irt.person.rasch #find the person parameter #This does the normal fit #has several internal functions ##the next two are the parallelized functions #parallelize by subject seems most helpful?ms bySubject <- function(i,count.f,total.f,items.f,discrim.f,diffi.f) { #First we consider the case of all right or all wrong #but we also need to consider the person with no data! if (count.f[i] > 0) { beta=discrim.f[!is.na(items.f[i,])] delta=diffi.f[!is.na(items.f[i,])] if((sum(items.f[i,],na.rm=TRUE) ==0 ) || (prod(items.f[i,],na.rm=TRUE) == 1 )) { if(sum(items.f[i,],na.rm=TRUE) ==0 ) { #the case of all wrong # we model this as #although probably we don't need to do this anymore if(mod =="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=beta,delta=delta,scores = rep(0,sum(!is.na(items.f[i,] )))) } else { myfit <- optimize(irtLimit.2par.norm,bounds,beta=beta,delta=delta, scores = rep(0,sum(!is.na(items.f[i,])))) } theta <- myfit$minimum fit <- myfit$objective } else { if(prod(items.f[i,],na.rm=TRUE) == 1 ) { if (mod=="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=beta,delta=delta,scores = rep(1,sum(!is.na(items.f[i,])))) #do the logistic fit } else { myfit <- optimize(irtLimit.2par.norm,bounds,beta=beta,delta=delta,scores = rep(1,sum(!is.na(items.f[i,])))) } #do a normal fit function theta <- myfit$minimum fit <- myfit$objective } }} else { scores=t(items.f[i,!is.na(items.f[i,])]) #make this numeric in the case of weird (highly missing) data if(mod=="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=beta,delta=delta,scores=scores) #do the logistic fit } else { myfit <- optimize(irtLimit.2par.norm,bounds,beta=beta,delta=delta,scores=scores)} #do a normal fit function theta <- myfit$minimum fit <- myfit$objective #fit of optimizing program }} else {#cat("\nno items for subject",i) total.f[i] <- NA theta <- NA fit <- NA } #end if count ... else return(list(theta,total.f[i],fit) ) } #end bySubject #parallelize by factor #this is the the one to use when parallelized bigFunction <- function(f,n.obs,stats,items,keys=NULL,cut=.3,bounds=c(-5,5),mod="logistic") { nf <- length(stats$difficulty) diff <- stats$difficulty[[f]] cat <- dim(diff)[2] if(nf < 2) {#discrim <- drop(stats$discrimination) discrim <- stats$discrimination # although I need to check this with keys if(!is.null(keys)) {discrim <- discrim * abs(keys)} } else {discrim <- stats$discrimination[,f] if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) }} ### fit <- rep(NA,n.obs) theta <- rep(NA,n.obs) if(is.null(keys)) {#drop the items with discrim < cut items.f <- items[,(abs(discrim[,f]) > cut) ,drop=FALSE] #get rid of the those items that are not being processed for this factor diffi.f <- diff[(abs(discrim[,f]) > cut)] #and the redundant diffi discrim.f <- discrim[(abs(discrim[,f]) > cut),drop=FALSE ] #and get rid of the unnecessary discrim values } else { #the case of scoring with a keys vector items.f <- items[,(abs(keys[,f]) > 0) ,drop=FALSE] #get rid of the those items that are not being processed for this factor discrim.f <- discrim[(abs(keys[,f]) > 0),drop=FALSE ] #and get rid of the unnecessary discrim values diffi.f <- diff[(abs(keys[,f]) > 0)] #and the redundant diffi } diffi.vect <- as.vector(t(diffi.f)) #discrim.F.vect <- rep(discrim.f,each=cat) #discrim.f <- discrim[(abs(discrim > cut)),drop=FALSE] discrim.F.vect <- as.vector(t(discrim.f)) if(is.matrix(discrim)) discrim.F.vect <- drop(discrim.F.vect) total <- rowMeans(t(t(items.f)*sign(discrim.F.vect)),na.rm=TRUE) count <- rowSums(!is.na(items.f)) #We can speed this up somewhat if we don't try to fit items with 0 discrim (i.e., items that do not load on the factor or are not keyed) #do it for all subject for this factor #now, lets parallelize this for each subject as well #this is probably a bad idea, for it leads to an amazing amount of overhead in terms of memory and processes #mapply for debugging, mcmapply for parallel #lets just try mapply to see if it gets around the problem #actually, making this mcmapply and the call to bigFunction mapply seems to be the solution #especially when we are doing scoreIrt.1pl or scoreIrt.2pl which is already doing the parallelsim there subjecttheta <- mcmapply(bySubject,c(1:n.obs),MoreArgs = list(count,total,items.f,discrim.f,diffi.f)) #returns a list of theta and fit subjecttheta <- matrix(unlist(subjecttheta),ncol=3,byrow=TRUE) theta <- subjecttheta[,1] total <- subjecttheta[,2] fit <- subjecttheta[,3] theta [theta < bounds[1]] <- bounds[1] theta[theta > bounds[2]] <- bounds[2] # if((!is.null(keys)) & (all(keys[,f] == -1) || (sign(cor(discrim,keys[,f],use="pairwise")) < 0) )) {theta <- -theta #if((!is.null(keys)) & (all(keys[,f] == -1) )) {theta <- -theta # total <- -total} nf <- length(stats$difficulty) n.obs <- dim(items)[1] nvar <- dim(items)[2] # scores <- matrix(NaN,nrow=n.obs,ncol=nf*3) #scores <- list(nf*3) scores <- list(theta,total,fit) return(scores) } # end of bigFunction #now do the scoring one factor at a time but allowing multiple cores #we now start score.irt.2 proper #this finds scores using multiple cores if they are available nf <- length(stats$difficulty) n.obs <- dim(items)[1] min.item <- min(items,na.rm=TRUE) #uses local minima --probably problematic for small number of items items <- items - min.item #this converts scores to positive values from 0 up (needed to do the fitting function) #this parallels by factor which in turn is parallelized by subject in bySubject #use mapply for debugging, mcmapply for parallel processing #since we are already parallelizing by scale when we call scoreIrt.1pl or .2pl, this is not necessary to parallelize scores <- mcmapply(bigFunction,c(1:nf),MoreArgs=list(n.obs=n.obs,items=items,stats=stats,keys=keys, cut=cut, bounds=bounds, mod=mod)) nf <- length(stats$difficulty) scores <- matrix(unlist(scores),ncol=nf*3) scores <- scores[,c(seq(1,nf*3,3),seq(2,nf*3+1,3),seq(3,nf*3 +2,3))] colnames(scores) <- paste(rep(c("theta","total","fit"),each=nf),1:nf,sep="") return(scores) }#end of score.irt.2 ################### ##################### "score.irt.poly" <- function(stats,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { #find the person parameters in a 2 parameter model we use deltas and betas from irt.discrim and irt.person.rasch #find the person parameter #created July 4, 2011 #revised Dec 31, 2016 to match irt.2 # this optimizes the logistic function, # irt.2par.poly <- function(x,delta,beta,scores) { # fit <- -(scores*log(1/(1+exp(beta*(delta-x)))) + (1-scores)*log(1/(1+exp(beta*(x-delta))))) # mean(fit,na.rm=TRUE) # } # # irt.2par.poly.norm <- function(x,delta,beta,scores) { # fit <- -1*(log(scores*(1-pnorm(beta*(delta-x))) + (1-scores)*(1-pnorm(beta*(x-delta))))) # mean(fit,na.rm=TRUE) # } ####The function that is parallelized big.poly <- function(f,n.obs,stats,items,keys=NULL,cut=.3,bounds=c(-5,5),mod="logistic") { nf <- ncol(stats$discrimination) #for (f in 1:nf) { #do it for every factor/scale diff <- stats$difficulty[[f]] if(nf < 2) {discrim <- stats$discrimination if(!is.null(keys)) {discrim <- discrim * abs(keys) } } else {discrim <- stats$discrimination[,f] if(!is.null(keys)) {discrim <- discrim * abs(keys[,f]) } } cat <- dim(diff)[2] total <- rep(NA,n.obs) fit <- rep(NA,n.obs) theta <- rep(NA,n.obs) item.f <- t(items) item.f[abs(discrim) < cut] <- NA #this does not change the item, just the temp version of the item item.f <- t(item.f) ### if(!is.null(keys)) {item.f <- item.f[,(abs(keys[,f] )> 0) ,drop=FALSE] #get rid of the those items that are not being processed for this factor discrim.f <- discrim[(abs(keys[,f]) > 0),drop=FALSE ] #and get rid of the unnecessary discrim values diffi.f <- diff[(abs(keys[,f]) > 0),byrows=TRUE] #and the redundant diffi diffi.vect <- as.vector(t(diff[(abs(keys[,f]) > 0),byrows=TRUE])) discrim.F.vect <- rep(discrim.f,each=cat) } else { discrim.f <- discrim diffi.f <- diff diffi.vect <- as.vector(t(diff) ) discrim.F.vect <- rep(discrim.f,each=cat) } ## notice that this is vectorized and does it for all subjects #seem to have solved the problem of missing items which are reversed. total <- rowMeans(t(t(item.f )* as.vector(sign(discrim.f))),na.rm=TRUE) #fixed 11/11/11 to be as.vector # total.positive <- rowMeans(t(t(item.f)* as.vector(sign(discrim.f) > 0)),na.rm=TRUE) # total.negative <- rowMeans(t(t(item.f)* as.vector(sign(discrim.f) < 0)),na.rm=TRUE) ## num.keyed <- rowSums(!is.na(item.f)) num.reversed <- rowSums(!is.na(item.f[,discrim.f < 0,drop=FALSE])) total <- total + num.reversed * (max.item- min.item+1)/num.keyed + min.item total[is.nan(total)] <- NA count <- rowSums(!is.na(item.f)) #but now, we need to do the next step one at a time (I think) for (subj in 1:n.obs) { if (count[subj]> 0) { #just do cases where we actually have data newscore <- NULL score <- item.f[subj,] #just the items to be scored for (i in 1:ncol(item.f)) { #Treat the items as a series of 1 or 0 responses - but note that cat = max - min if(is.na(score[i])) {newscore <- c(newscore,rep(NA,cat)) } else { if(very.close(score[i],( cat))) {newscore <- c(newscore,rep(1,cat)) } else { newscore <- c(newscore,rep(1,score[i]),rep(0,cat-score[i])) } }} beta=discrim.F.vect[!is.na(score)] #does this handle missing values -- need to fix? delta=diffi.vect[!is.na(score)] if((very.close(total[subj],min.item)) | (very.close(total [subj],(max.item+min.item)) )){ # first check for all lowest responses or all highest responses if(very.close(total[subj],min.item)) { # The case of all wrong #we need to make sure that this value is less than any value for non=minimal responses #do the same thing that we do for the score.irt.2 if(mod =="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore)} else {myfit <- suppressWarnings(optimize(irtLimit.2par.norm,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore)) } theta[subj] <- myfit$minimum fit[subj] <- myfit$objective } else { if(very.close(total [subj],(max.item+min.item))) {#all right if(mod=="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore) } else { myfit <- suppressWarnings(optimize(irtLimit.2par.norm,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore)) } theta[subj] <- myfit$minimum fit[subj] <- myfit$objective }} } else { #just process those items where we have some responses that are neither max nor min if(mod=="logistic") { myfit <- optimize(irtLimit.2par,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore) } else {myfit <- suppressWarnings(optimize(irtLimit.2par.norm,bounds,beta=discrim.F.vect,delta=diffi.vect,scores=newscore)) } theta[subj] <- myfit$minimum fit[subj] <- myfit$objective #fit of optimizing program } } else { fit[subj] <- NA theta[subj] <- NA } #end if else } if((!is.null(keys)) & (all(keys[,f] == -1) )) {theta <- -theta total <- -total} theta[theta < bounds[1]] <- bounds[1] theta[theta > bounds[2]] <- bounds[2] scores <- list(theta,total, fit) return(scores) } #end of big function ##the start of the irt.poly.function after setting up the various subfunctions min.item <- min(items,na.rm=TRUE) #uses local minima --probably problematic for small number of items items <- items - min.item #this converts scores to positive values from 0 up max.item <- max(items,na.rm=TRUE) #we use this when reverse score -- but note that this is not the original max value. We will adjust this in total nf <- length(stats$difficulty) n.obs <- dim(items)[1] nvar <- dim(items)[2] #mcmapply for parallel, mapply for debugging #scores <- mapply(big.poly,1:nf,MoreArgs=list(n.obs=n.obs,stats=stats,items=items,keys=keys,cut=cut,bounds=bounds,mod=mod)) scores <- mcmapply(big.poly,1:nf,MoreArgs=list(n.obs=n.obs,stats=stats,items=items,keys=keys,cut=cut,bounds=bounds,mod=mod)) scores <- matrix(unlist(scores),ncol=nf*3) scores <- scores[,c(seq(1,nf*3,3),seq(2,nf*3+1,3),seq(3,nf*3 +2,3))] colnames(scores) <- paste(rep(c("theta","total","fit"),each=nf),1:nf,sep="") return(scores) } #end of score.irt.poly ################################################# # # The main function # # which in turn calls either the dichotomous scoring (score.irt.2) # or the polytomous version (scoreIrt.poly #operates either as score.irt (deprecated) or scoreIrt (preferred) ############################################################ "score.irt" <- function(stats=NULL,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { message("score.irt is deprecated and has been replaced by scoreIrt, please change your call") scoreIrt(stats=stats,items=items,keys=keys,cut=cut,bounds=bounds,mod=mod) } "scoreIrt" <- function(stats=NULL,items,keys=NULL,cut=.3,bounds=c(-4,4),mod="logistic") { #depending upon what has already been done (in the stats object), we fire off different scoring functions #added the tau option in switch in case we have already done irt.tau 6/29/16 #we need to adjust the discrimination order from irt.fa to match the order of the items if(!is.null(keys) && is.list(keys)){ select <- sub("-","",unlist(keys)) items <- items[select] keys <- make.keys(items,keys)} if(!is.null(keys) && (is.vector(keys))) keys <- matrix(keys) if (length(class(stats)) > 1) { if(!is.null(keys) && is.vector(keys)) keys <- as.matrix(keys) none <- irt.poly <- NA #just to get around a compiler warning obnames <- cs(irt.poly, irt.fa, fa, tau, none) value <- inherits(stats, obnames, which=TRUE) if (any(value > 1)) {value <- obnames[which(value >0)]} else {value <- "none"} switch(value, irt.poly = {scores <- score.irt.poly(stats$irt,items,keys,cut,bounds=bounds,mod=mod) }, irt.fa = {scores <- score.irt.2(stats$irt,items,keys,cut,bounds=bounds,mod=mod)}, fa = {tau <- irt.tau(items) #this is the case of a factor analysis to be applied to irt nf <- dim(stats$loadings)[2] diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau/sqrt(1-stats$loadings[,i]^2) } discrim <- stats$loadings/sqrt(1-stats$loadings^2) class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) scores <- score.irt.poly(new.stats,items,keys,cut,bounds=bounds)}, tau = {tau <- stats #get the tau stats from a prior run if(is.matrix(keys)) {nf <- dim(keys)[2]} else {nf <-1} diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau } discrim <- keys class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) if(dim(tau)[2] ==1) {scores <- score.irt.2(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} else { scores <- score.irt.poly(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} }, none = {#this is the case of giving it a discrimination vector and a difficulty matrix #this allows us to score meeting scoring keys from elsewhere (e.g. from PROMIS) #first make sure that there is something there # stats <- as.matrix(stats) if(!is.null(stats)) { discrim <- stats[,1,drop=FALSE] nlevels <-NCOL(stats) diffi <- stats[,2:nlevels] diffi <- list(diffi) new.stats <- list(difficulty=diffi,discrimination=discrim) scores <- score.irt.poly(stats=new.stats,items=items,keys=NULL,cut=cut,bounds=bounds)} else {stop("I am confused. You have an unclassed stats object.")} } )#end of switch #we should have a null case } else { #the stats input if it exists does not have a class #input is probably a keys matrix but not in the case of a single item being scored #the normal case where we find item dificulities and weight items equally if(!is.null(stats)) {#an external set of diffs and discrims is provide -- e.g. from PROMIS #diffi <- stats$difficulty #discrim <- as.matrix( stats$discrim,ncol=1) # stats$discrimination <- discrim new.stats <- list() new.stats$discrimination <- as.matrix(stats[1]) nlevels <- NCOL(stats) diffi <- stats[2:nlevels] colnames(diffi) <- c(1:(nlevels-1)) new.stats$difficulty <- list(diffi) scores <- score.irt.poly(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds) } else {#the normal case tau <- irt.tau(items) #this is the case of a using a scoring matrix to be applied to irt if(is.matrix(keys)) {nf <- dim(keys)[2]} else {nf <-1} diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau } if(!is.null(keys)) {discrim <- keys} else { # stop("I am sorry, you specified tau but not keys.") #or you are scoring a single item #diffi[[1]] <- stats$irt$difficulty discrim <- matrix(1,nrow=NCOL(items),ncol = 1) #as strange as this seems, this needs to be a 1 x 1 matrix } class(diffi) <- NULL class(discrim) <- NULL new.stats <- list(difficulty=diffi,discrimination=discrim) if(dim(tau)[2] ==1) {scores <- score.irt.2(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} else { scores <- score.irt.poly(stats=new.stats,items=items,keys=keys,cut=cut,bounds=bounds)} } } scores <- data.frame(scores) if(!is.null(keys)) {colnames(scores) <-c( paste(colnames(keys),"theta",sep="-"),paste(colnames(keys),"total",sep="-"),paste(colnames(keys),"fit",sep="-"))} return(scores) } ############ END of scoreIrt ################## #### #Various helper functions very.close <- function(x,y,tolerance = .Machine$double.eps) { abs(x-y) < tolerance} ##### #find tau from dichotomous or polytomous data without bothering to find the correlations #useful for score.irt #modified July 14, 2016 to speed up significantly by dropping the xt <- table(x) line "irt.tau" <- function(x) { x <-as.matrix(x) nvar <- dim(x)[2] xmin <- min(x,na.rm=TRUE) xmax <- max(x,na.rm=TRUE) nvalues <- xmax-xmin +1 if(nvalues > 10) stop("You have more than 10 categories for your items, polychoric is probably not needed") #xt <- table(x) #this can take a long time for sapa data #nvalues <- length(xt) #find the number of response alternatives if(nvalues ==2) {tau <- -qnorm(colMeans(x,na.rm=TRUE)) tau <- as.matrix(tau) rownames(tau) <- colnames(x)} else { if(nvalues > 10) stop("You have more than 10 categories for your items, polychoric is probably not needed") #xmin <- min(x,na.rm=TRUE) xfreq <- apply(x- xmin+ 1,2,tabulate,nbins=nvalues) n.obs <- colSums(xfreq) xfreq <- t(t(xfreq)/n.obs) tau <- qnorm(apply(xfreq,2,cumsum))[1:(nvalues-1),] #these are the normal values of the cuts if(!is.matrix(tau)) tau <- matrix(tau,ncol=nvar) rownames(tau) <- paste0(xmin:(xmax-1)) colnames(tau) <- colnames(x) if(dim(tau)[1] < dim(tau)[2]) tau <- t(tau) #rows are variables, columns are subjects } class(tau) <- c("psych","tau") #added the tau class so score.irt can use the tau values return(tau) } #added August 6, 2012 "irt.responses" <- function(theta,items, breaks = 11,show.missing=FALSE,show.legend=TRUE,legend.location="topleft",colors=NULL,...) { #if(is.null(colors)) colors =c("gray0", "blue3", "red3", "darkgreen", "gold2", "gray50", "cornflowerblue", "mediumorchid2") if(is.null(colors)) colors =c("black", "blue", "red", "darkgreen", "gold2", "gray50", "cornflowerblue", "mediumorchid2") #legend.location <- c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center","none") #uniqueitems <- unique(as.vector(unlist(items))) item.counts <- names(table(as.vector(unlist(items)))) uniqueitems <- as.numeric(item.counts) nalt <- length(uniqueitems) + 1 #include the missing value from response.frequencies nvar <- ncol(items) theta.min <- min(theta,na.rm=TRUE) theta.max <- max(theta,na.rm=TRUE) binrange <- cut(theta, breaks = breaks) binnums <- as.numeric(binrange) items <- as.matrix(items) stats <- by(items,binnums,function(x) response.frequencies(x,uniqueitems=uniqueitems)) stats.m <- unlist(stats) stats.m <- matrix(stats.m,ncol=nvar*nalt,byrow=TRUE) theta <- seq(theta.min,theta.max,length.out=breaks) for (i in 1:nvar) { plot(theta,stats.m[,i],ylim=c(0,1),typ="l",xlab="theta",ylab="P(response)",main=paste(colnames(items)[i]),col=colors[1],...) for(j in 1:(nalt-2+show.missing)) { points(theta,stats.m[,i+nvar*j],typ="l",lty=(j+1),col=colors[j+1 ],...) } if(show.legend) { legend(legend.location, paste(item.counts[1:(nalt-1 + show.missing)]), text.col = colors[1:(nalt-1+show.missing)], lty = 1:(nalt-1+show.missing), ncol=4,bty="n")} }} #developed based on suggestions and code by David Condon #scores multiple scales with full 2pl parameters #gets around the problem of tau differences for 0/1 and 1/6 scales. #Requires finding the correlation matrix for each scale, rather than taking advantage of a prior correlation matrix #modifed Jan 3, 2017 to reverse key scales where the keys and the factor solution are backwards (e.g., stability vs. neuroticism) #modified August, 2017 to handle the case of single item scales scoreIrt.2pl <- function(itemLists,items,correct=.5,messages=FALSE,cut=.3,bounds=c(-4,4),mod="logistic") { nvar <- length(itemLists) #check to make sure we didn't screw up the itemsLists and the items if(NCOL(itemLists) > 1) {stop("You seem to have misspecified the ItemLists. I am stopping")} if(NCOL(items)==1) {stop("You seem to have misspecified the items. I am stopping")} select <- sub("-","",unlist(itemLists)) #select just the items that will be scored select <- select[!duplicated(select)] items <- items[,select] #this should reduce memory load #make sure all correlations will exist cp <- pairwiseDescribe(items) if(cp$min< 3) {stop("Some cells do not have enough data to continue. Please examine the pairwise structure of your data.")} #we turn off the sorting option in irt.fa so that the item discriminations match the scoring order #small function is called using parallel processing smallFunction <- function(i,selection,correct,cut=cut,bounds=bounds,mod=mod) { direction <- rep(1,length(selection[[i]])) neg <- grep("-", selection[[i]]) direction[neg] <- -1 select <- sub("-","",selection[[i]]) selectedItems <- as.matrix(items[,select,drop=FALSE]) if(NCOL(selectedItems) > 1 ) { #The normal case is to have at least 2 items in a scale if(!messages) {suppressMessages(stats <- irt.fa(selectedItems,correct=correct,plot=FALSE,sort=FALSE))} else { stats <- irt.fa(selectedItems,correct=correct,plot=FALSE,sort=FALSE)} flip <- sum(sign(stats$irt$discrimination * direction)) if(flip < 0 ) stats$irt$discrimination <- -stats$irt$discrimination scores <- scoreIrt(stats,selectedItems,cut=cut,bounds=bounds,mod=mod) } else {stats <- list() #the case of a single item to score stats$irt$difficulty <- irt.tau(selectedItems) stats$irt$discrimination <- 1 scores <- scoreIrt(stats,selectedItems,keys=NULL,cut=cut,bounds=bounds,mod=mod) } scores <- scores$theta return(scores)} #use mapply for debugging, mcmapply for parallel processing #items is global and not passed to save memory scoresList <- mcmapply(smallFunction,c(1:nvar),MoreArgs=list(selection=itemLists,correct=correct,cut=cut,bounds=bounds,mod=mod)) colnames(scoresList) <- names(itemLists) return(scoresList) } #A perhaps more robust way of calling scoreIrt is to find tau just for a few items at a time and then scoring one scale at a time. #the alternative is use scoreIrt for all of them at once with a keys function. scoreIrt.1pl <- function(keys.list,items,correct=.5,messages=FALSE,cut=.3,bounds=c(-4,4),mod="logistic") { if(NCOL(keys.list)> 1) {stop("You seem to have misspecified the keys.list. I am stopping")} if(NCOL(items)==1) {stop("You seem to have misspecified the items. I am stopping")} select <- sub("-","",unlist(keys.list)) select <- select[!duplicated(select)] items <- items[,select] nf <- length(keys.list) fix <- is.numeric(keys.list[[1]]) #what does this do? smallFunction <- function(i,keys.list,correct,cut=cut,bounds=bounds,mod=mod) { list.i <- keys.list[[i]] keys <- rep(1,length(list.i)) neg <- grep("-", list.i) keys[neg] <- -1 select <- sub("-", "", list.i) # select <- colnames(items)[select] selectedItems <- as.matrix(items[,select]) #if items is a matrix, we need to specify all rows stats <- irt.tau(selectedItems) scores <- scoreIrt(stats,selectedItems,keys,cut=cut,bounds=bounds,mod=mod) # stats <- irt.tau(items[select]) # scores <- scoreIrt(stats,items[select],keys,cut=cut,bounds=bounds,mod=mod) scores <- scores[,1] } #use mapply for debugging, mcmapply for parallel processing #items are global and not passed scoresList <- mcmapply(smallFunction,c(1:nf),MoreArgs=list(keys.list=keys.list,correct=correct,cut=cut,bounds=bounds,mod=mod)) colnames(scoresList) <- names(keys.list) return(scoresList) } ################################# #The following are useful demonstration functions for examining how fitting works # # Might make public if we document them # ############################################# #show how the fitting function works for the case without limits on the fits #demonstrates the problem of all wrong or all right #Also shows the difference between normal and logistic fits ############### testIrt <- function(score,delta,beta,mod="logistic",limits=TRUE,lower=-4) {x <- seq(lower,-lower,.1) y <- x if(limits) { for(j in 1:nrow(score)) {scores <- score[j,] for (i in 1:length(x)) {if(mod=="logistic") {y[i] <- irtLimit.2par(x[i],delta,beta,scores) } else {y[i] <- irtLimit.2par.norm(x[i],delta,beta,scores)} } plot(y ~ x) for(k in 1:length(scores)) { text( -1 + .5*k,(max(y) + min(y) )*1/3,(scores[k]))} text(0,(max(y) + min(y))/2,round(x[which(y == min(y))],2)) }} else { for(j in 1:nrow(score)) {scores <- score[j,] for (i in 1:length(x)) {if(mod=="logistic") {y[i] <- irt.2par(x[i],delta,beta,scores) } else {y[i] <- irt.2par.norm(x[i],delta,beta,scores)} } plot(y ~ x) for(k in 1:length(scores)) { text( -1 + .5*k,(max(y) + min(y) )*2/3,(scores[k]))} text(0,(max(y) + min(y))/2,round(x[which(y == min(y))],2)) } } } #an alternative, and much simpler model (but that does not handle missing data) simpleScore <- function(scores,delta,beta,mod="logistic") { if (mod=="logistic") {estimate <- (-(scores %*%log(1/(1 + exp(-beta*delta))) - (1-scores)%*%log(1-1/(1+exp(-delta*beta))))) plog <- rowMeans(estimate) } else { estimate <- -1*(((scores)%*%log(beta*(1-pnorm((delta)))) - (1-(scores))%*%log(beta *pnorm(delta)))) plog <- (rowMeans(estimate))} return(plog) } #removed links to ltm since ltm does not work for polytomous data test.irt <- function(nvar = 9, n.obs=1000,mod="logistic",type="tetra", low=-3, high=3,seed=NULL) { if(!is.null(seed)) set.seed(seed) if(type =="tetra" ) { x.sim <- sim.irt(nvar=nvar,n=n.obs,low=low,high=high,mod=mod)} else { x.sim <- sim.poly(nvar=nvar,n=n.obs,low=low,high=high,mod=mod)} x.irt <- irt.fa(x.sim$items[,1:nvar],sort=FALSE,plot=FALSE) #if(!requireNamespace("ltm")) {stop("The ltm package is required when running test.irt")} # x.ltm <- ltm::ltm(x.sim$items~z1) # x.ltm.sc <- ltm::factor.scores(x.ltm) # ltm.responses <- table2df(x.ltm.sc$score.dat,x.ltm.sc$score.dat[,nvar+1]) # ltm.responses <- data.frame(ltm.responses[,c(1:nvar,nvar+3)]) # colnames(ltm.responses) <- c(colnames(x.sim$items),"ltm") # ltm.responses <- psychTools::dfOrder(ltm.responses,c(1:nvar)) xnvart <- data.frame(x.sim$items,theta = x.sim$theta) xnvart <- psychTools::dfOrder(xnvart,c(1:nvar)) x.fsall <- psych::factor.scores(xnvart[1:nvar],x.irt$fa,method="regression")$scores x.df <- data.frame(xnvart, fs=x.fsall) #cor2(x.df,ltm.responses) xdelta <- x.irt$irt$difficulty[[1]] xbeta <- x.irt$irt$discrimination x.scores <- data.matrix(x.df[1:nvar]) irt.sc <- scoreIrt(x.irt,x.scores) irt.scn <- scoreIrt(x.irt,x.scores,mod="normal") ras <- 1 x.tot <- rowSums(x.scores[,1:nvar]) if(type=="tetra") { pl2<- simpleScore(x.scores,xdelta,xbeta) pl1<- simpleScore(x.scores,xdelta,rep(ras,nvar)) pn2 <- simpleScore(x.scores,xdelta,xbeta,mod="normal") pn1<- simpleScore(x.scores,xdelta,rep(ras,nvar),mod="normal") x.df.sc <- data.frame(logist2pl=pl2,pl1,pn2, pn1 ,x.tot, fs =x.df$MR1,irt.sc[,1],irt.scn[,1],theta=x.df$theta) colnames(x.df.sc) <- c("PL2", "PL1", "PN2", "PN1","total", "factor","irt","irt-N","theta") } else {x.df.sc <- data.frame(x.tot, fs =x.df$MR1,irt.sc[,1],irt.scn[,1],theta=x.df$theta) colnames(x.df.sc) <- c("total", "factor","irt","irt-N","theta")} pairs.panels(x.df.sc) invisible(x.df.sc) } psych/R/fix.dplyr.R0000644000176200001440000000033213060550346013643 0ustar liggesusers"fix.dplyr" <- function (object) { if(is.data.frame(object)) { cn <- class(object) df <- which(cn=="data.frame") cn.not <- cn[-df] cn <- c("data.frame",cn.not) class(object) <- cn } invisible(object) }psych/R/simGene.R0000644000176200001440000000201413111321042013274 0ustar liggesusers"simGene" <- function(ng=10,traits=1,n.obs=1000,dom=TRUE) { X <- array(sample(2,ng*n.obs*traits*3,replace=TRUE),dim=c(n.obs,ng,traits,3)) MZ <- DZ <- array(NA,dim=c(n.obs,ng,traits)) MZt <- DZt <- matrix(NA,n.obs,traits) for(t in 1:traits) { if(dom) { MZ[,1:ng,t] <- X[,1:ng,t,1] * X[,1:ng,t,2] #the allele values are mulitplied DZ[,1:ng,t] <- X[,1:ng,t,1] * X[,1:ng,t,3]} else { MZ[,1:ng,t] <- X[,1:ng,t,1] + X[,1:ng,t,2] #the allele values are added DZ[,1:ng,t] <- X[,1:ng,t,1] + X[,1:ng,t,3]} MZt[,t] <- rowMeans(MZ[,,t]) #the trait values DZt[,t] <- rowMeans(DZ[,,t]) } X.df <- data.frame(genes=X[,1:ng,1:traits,sample(2,1,replace=TRUE)],MZ=MZt,DZ=DZt) return(X.df)} test.simGene <- function(x, ng=10) { t1 <-rowMeans(x[1:ng]) t2 <- rowMeans(x[(ng+1):(ng*2)]) t11 <-rowMeans(x[1:(ng/2)]) t12 <-rowMeans(x[(ng/2 +1):ng]) t21 <-rowMeans(x[(ng+1):(ng/2 + ng)]) t22 <- rowMeans(x[(ng/2 + ng+1):(ng*2)]) scores <- data.frame(t1=t1,t2=t2,t11=t11,t12=t12,t21 = t21,t22=t22,traits=x[(ng*2 +1):(ng*2+4)]) }psych/R/scoreWtd.R0000644000176200001440000000302113543703043013514 0ustar liggesusers"scoreWtd" <- function(weights,items,std=TRUE,sums=FALSE,impute="none"){ vars <-rownames(weights) n.scales <- NCOL(weights) vnames <- colnames(weights) if(any(c("(Intercept)","Intercept") %in% vars)) {items <- data.frame(Intercept=1,items) colnames(items)[1] <- "(Intercept)" vars[1] <- "(Intercept)"} selected <-items[vars] #just use those items that have weights switch (impute, mean ={ miss <- which(is.na(items),arr.ind=TRUE) item.means <- colMeans(items,na.rm=TRUE) #replace missing values with means items[miss]<- item.means[miss[,2]]}, median={ miss <- which(is.na(items),arr.ind=TRUE) item.med <- apply(items,2,median,na.rm=TRUE) #replace missing with medians items[miss]<- item.med[miss[,2]]} ) if(std) {z.scores <-scale(selected)} else z.scores <- selected # wtd.scores <-z.scores %*% (weights) #this is the most basic version, but doesn't handle any missing wtd.scores <- matrix(rep(NA,n.scales * NROW(items)),ncol=n.scales) #this is just a dummy array if(sums) { weights <- t(weights) for(j in 1:n.scales) {wtd.scores[,j] <- colSums(weights[j,] *t(z.scores),na.rm=TRUE)} } else { if(n.scales ==1) { wtd.scores[,1] <- colMeans(weights[,1] *t(z.scores),na.rm=TRUE)} else { weights <- t(weights) for(j in 1:n.scales) { wtd.scores[,j] <- colMeans(weights[j,] *t(z.scores),na.rm=TRUE) }} } colnames(wtd.scores) <- vnames return(wtd.scores) } #developed September, 2019 to get more precise weights (still not beta weights) to take advantage of large sample stability psych/R/reverse.code.R0000644000176200001440000000610714432500240014306 0ustar liggesusers"reverse.code" <- function(keys,items,mini=NULL,maxi=NULL) { if(is.vector(items)) {nvar <- 1} else {nvar <- dim(items)[2]} items <- as.matrix(items) if(is.null(maxi)) {colMax <- apply(items,2,max,na.rm=TRUE)} else {colMax <- maxi} if(is.null(mini)) {colMin <- apply(items,2,min,na.rm=TRUE) } else {colMin <-mini} colAdj <- colMax+colMin if(length(keys) < nvar) { temp <- keys if(is.character(temp)) temp <- match(temp,colnames(items)) keys <- rep(1,nvar) keys[temp] <- -1 } if(is.list(keys) | is.character(keys)){ keys <- make.keys(items,keys) keys <- diag(keys)} keys.d <- diag(keys,nvar,nvar) items[is.na(items)] <- -9999 #a way of using matrix operations even for missing data reversed <- items %*% keys.d adj <- abs(keys*colAdj) #now we need to add the adjustments to the ones we flipped adj[keys > 0] <- 0 #added December 26 new <- t(adj + t(reversed)) new[abs(new) > 999] <- NA colnames(new) <- colnames(items) colnames(new)[keys < 0] <- paste(colnames(new)[keys < 0],"-",sep="") return(new) } #corrected April 3, 2010 to properly do matrix addition #modified Sept 14, 2013 to allow symbolic names and to allow for just specifying the ones to reversed #suggested by David Stanley #fixed bug reported by Jian Jin (26/12/13) "rescale" <- function(x,mean=100,sd=15,df=TRUE) {if(df) {x <- data.frame(t(t(scale(x))*sd+mean)) } else {x <- t( t(scale(x))*sd +mean)} return(x) } "scrub" <- function (x, where, min, max, isvalue,newvalue,cuts=NULL) { if (missing(min)) min <- -Inf if (missing(max)) max <- Inf if (missing(isvalue)) isvalue <- Inf if (missing(where)) where <- 1:NCOL(x) maxlength <- max(length(isvalue),length(min),length(max),length(where)) if(missing(newvalue)) newvalue <- rep(NA,maxlength) if (length(min) == 1) min <- rep(min, ncol(x)) if (length(max) == 1) max <- rep(max, ncol(x)) if(length(where) == 1) where <- rep(where,maxlength) if(length(isvalue) ==1) isvalue <- rep(isvalue,maxlength) if(length(newvalue) ==1) newvalue <- rep(newvalue,maxlength) if(is.null(cuts)) {for(k in 1: maxlength) { i <- where[k] x[!is.finite(x[,i]),i] <- NA if(is.numeric(x[,i])) { x[(!is.na(x[, i]) & (x[, i] < min[k])), i] <- newvalue[k] x[(!is.na(x[, i]) & (x[, i] > max[k])), i] <- newvalue[k] } x[(!is.na(x[, i]) & (x[, i] == isvalue[k])), i] <- newvalue[k] }} else { n.cuts <- length(cuts) for (j in 1:n.cuts) { for(k in 1:length(where)) { x[ (!is.na(x[where[k]])) & (x[where[k]] >= cuts[j] ) & (x[where[k]] < cuts[j+1]),where[k]] <- j } } } return(x) } #added Sept 11, 2010 #modified December 6, 2010 to allow recoding #modified December 3, 2011 to be more general #modifed January 8, 2012 to be a bit more flexible #modified April 11, 2019 to handle character data #fixed August 9, 2019 to correctly process is.numeric #modified December 12th ,2020 to include the "bucketing" option #modified December 2nd, 2021 to make Nan and inf values NA psych/R/make.hierarchical.R0000644000176200001440000000255612436202746015274 0ustar liggesusers# A function to create a correlation matrix with a hierarchical structure "make.hierarchical" <- function (gload=NULL,fload=NULL,n=0,raw=FALSE) { # require(MASS) if(is.null(gload)) gload=matrix(c(.9,.8,.7),nrow=3) if(is.null(fload)) {fload <-matrix(c( .8,0,0, .7,0,.0, .6,0,.0, 0,.7,.0, 0,.6,.0, 0,.5,0, 0,0,.6, 0,0,.5, 0,0,.4), ncol=3,byrow=TRUE)} fcor <- gload %*% t(gload) #the factor correlation matrix diag(fcor) <-1 #put ones on the diagonal model <- fload %*% fcor %*% t(fload) #the model correlation matrix for oblique factors diag(model)<- 1 # put ones along the diagonal nvar <- dim(fload)[1] colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="") if(n>0) { # mu <- rep(0,nvar) #model <- mvrnorm(n = n, mu, Sigma=model, tol = 1e-6, empirical = FALSE) #the next 3 lines replaces mvrnorm (adapted from mvrnorm, but without the checks) eX <- eigen(model) model <- matrix(rnorm(nvar * n),n) model <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(model)) if (!raw ) { model <- cor(model) } } make.hierarchical <- model } psych/R/plot.reliability.r0000644000176200001440000000775314101257376015274 0ustar liggesusers #Created June 11-17, 2021 #fixed 6/20/21 to avoid the problem of null cases "plot.reliability" <- function(x,omega=TRUE,alpha=TRUE,split=TRUE,uni=TRUE, add=FALSE,xlim=NULL,main=NULL,...) { if(!is.list(x) && split) {stop("To show split halfs, you must call reliability with raw=TRUE")} rel.object <- x if(!is.list(x)) {result.df<- x} else {result.df<- x$result.df} if(split) { x <- rel.object$splits #get rid of the empty elements lx <- lapply(x,length) global.min <- min(unlist(lapply(x[lx>0],min)),result.df[,1]) global.max <- max(unlist(lapply(x[lx >0],max)),result.df[,3]) u.max <- max(unlist(lapply(x[lx >0],max)),result.df[,4]) if(uni) global.max <- max(global.max,u.max) nvar <- length(x[lx> 0]) labels<- names(x[lx>0]) x <- x[lx>0] } else { global.min <- min(result.df[,1]) global.max <- max(result.df[,3]) nvar <- NROW(result.df) labels <- rownames(result.df)} if(!add) plot.new() ord <- nvar:1 labels[ord] <-labels linch <- max(strwidth(labels)) lheight <- par("csi") # linch <- if (!is.null(labels)) max(strwidth(labels, "inch"),label.width, na.rm = TRUE) ginch <- max(strwidth(labels, "inch"), na.rm = TRUE) goffset <- (max(linch + 0.1, ginch, na.rm = TRUE) + 0.1)/lheight gpos <- 1:nvar if (!(is.null(labels) )) { nmai <- par("mai") nmai[2L] <- nmai[4L] + max(linch + 0, ginch) + 0.1 par(mai = nmai) } if(!add ) {if(missing(xlim)) xlim=c(global.min-.05,global.max+.05) #this makes all the plots on the same scale plot.window(xlim = xlim, ylim = c(.9,nvar+1.1))} linch <- max(strwidth(labels)) lheight <- par("csi") # linch <- if (!is.null(labels)) max(strwidth(labels, "inch"),label.width, na.rm = TRUE) ginch <- max(strwidth(labels, "inch"), na.rm = TRUE) goffset <- (max(linch + 0.2, ginch, na.rm = TRUE) + 0.1)/lheight mtext(labels, side = 2, line = goffset, at = gpos, adj = 0, las = 2, ...) # if (!is.null(gdata)) { abline(h = gpos, lty = "dotted") for(i in 1:nvar) { #show the distributions abline(h = i, lty = "dotted") if(split) { h <- density(x[[i]],na.rm=TRUE) hor <- h$x vert <- h$y/(1 * max(h$y)) + nvar - i + 1 points(hor,vert,type="l",...) } } if(!add) axis(1) if(!add) box() if(omega){ text(result.df[,1],nvar:1,expression(omega[h])) #omega_h text(result.df[,3],nvar:1,expression(omega[t])) #omega total } if(alpha) {text(result.df[,2],nvar:1,expression(alpha)) } if(uni) {text(result.df[,4],nvar:1,"u")} if(is.null(main)) {if(split) { if(omega & alpha & uni ){ main=expression(paste("Split half distributions +", ~~~~ omega[h], ~~~~ alpha,~~~~ omega[t], " + unidim"))} else { if(omega & alpha &!uni) { main=expression(paste("Split half distributions +", ~~~~ omega[h], ~~~~ alpha, ~~~~ omega[t]))} else { if(omega & !alpha & uni) { main=expression(paste("Split half distributions +", ~~~~ omega[h], ~~~~ omega[t], " + unidim"))} else { if(omega & !alpha) { main=expression(paste("Split half distributions +", ~~~~ omega[h], ~~~~ omega[t]))} else { if(!omega & alpha) {main=expression(paste("Split half distributions +", ~~~~ alpha,))} } } } } } else { if(omega & alpha & uni) { main=expression(paste(" ", ~~~~ omega[h], ~~~~ alpha,~~~~ omega[t], " + unidim" ))} else { if(omega & !alpha & uni) { main=expression(paste( ~~~~ omega[h], ~~~~ omega[t], " + unidim"))} else { if(!omega & alpha) {main=expression(paste( ~~~~ alpha,))}} } } } title(main) } psych/R/make.keys.R0000644000176200001440000000607714313656076013640 0ustar liggesusers"make.keys" <- function(nvars,keys.list,item.labels=NULL,key.labels=NULL) { #use ncol rather than NCOL to return null if just numbers if(!is.null(ncol(nvars))) {item.labels <- colnames(nvars) nvars <- NCOL(nvars)} else { if(!is.numeric(nvars)) {item.labels <- nvars nvars <- length(item.labels)} } nkeys <- length(keys.list) keys <- matrix(rep(0,nvars*nkeys),ncol=nkeys) for (i in 1:nkeys) { if(!is.null(keys.list[[i]])) { list.i <- unlist(keys.list[[i]]) if((is.character(list.i)) && !is.null(item.labels)) { neg <- grep("-",list.i) list.i <- sub("-","",list.i) list.i <- match(list.i,item.labels) if(!any(is.na(neg))) list.i[neg] <- -list.i[neg]} keys[abs(list.i),i] <- sign(list.i ) } } if(!is.null(key.labels)) {colnames(keys) <- key.labels} else {colnames(keys) <- names(keys.list)} if(!is.null(item.labels)) {rownames(keys) <- item.labels} return(keys)} #written June 11, 2008 #revised Sept 15, 2013 to allow for symbolic keys #revised November 21, 2018 to allow null keys # # "keys2list" <- function(keys,sign=TRUE) { # keys.list <- list() # nkeys <- ncol(keys) # for (i in 1:nkeys) {temp <- rownames(keys)[which(keys[,i] < 0)] # if(sign && (length(temp) >0)) temp <- paste0("-",temp) # keys.list[[i]] <- c(rownames(keys)[which(keys[,i] > 0)],temp) # } # names(keys.list) <- colnames(keys) # keys.list} #Added July 9, 2017 "selectFromKeys" <- function(keys.list) { select <- sub("-","",unlist(keys.list)) select <- select[!duplicated(select)] return(select) } #Basically, the opposite of make.keys #Takes a keys matrix and converts it to a list structure (with negative signs appropriately placed) #9/10/16 #revised 6/10/18 to not change the order of keys #fixed 6/22/21 to properly reverse key items #fixed 7/07/21 to actually do this! #and patch again 7/17/21 to do it "keys2list" <- function(keys,sign=TRUE) { keys.list <- list() nkeys <- ncol(keys) for (i in 1:nkeys) { temp <- rownames(keys)[which(keys[,i] != 0)] wk <- which(keys[,i] < 0) if(length(wk)>0 ){ temp[temp%in%rownames(keys)[wk]] <- paste0("-",temp[temp%in%rownames(keys)[wk]])} keys.list[[i]] <- temp } #if(sign && (length(temp) >0)) temp <- paste0("-",temp) # keys.list[[i]] <- c(rownames(keys)[which(keys[,i] > 0)],temp) names(keys.list) <- colnames(keys) keys.list} "makePositiveKeys" <- function(keys.list,sign=FALSE) { nkeys <- length(keys.list) pos.list <- neg.list <- list() for(i in 1:nkeys) {temp <- keys.list[[i]] neg.pos <- grep("-",temp) if(length(neg.pos) >0) { pos.list[[i]] <- temp[-neg.pos] neg.list[[i]] <- temp[neg.pos]} else {pos.list[[i]] <- keys.list[[i]] neg.list[[i]] <-keys.list[[i]]} if(!sign) neg.list[[i]] <- sub("-","",neg.list[[i]]) } names(pos.list) <- names(keys.list) names(neg.list) <- paste0(names(keys.list),".neg") return(c(pos.list,neg.list)) } psych/R/lowerUpper.R0000644000176200001440000000172414403735751014105 0ustar liggesusers#written June 6, 2012 #modified March 14, 2023 #note that lower.tri and upper.tri return the matrix in a different order "lowerUpper" <- function(lower,upper=NULL,diff=FALSE) { if(is.null(upper)) {upper <- lower #return two from one upper[lower.tri(upper)] <- t(upper)[lower.tri(t(upper))] lower <- t(lower) lower[lower.tri(lower)] <- t(lower)[lower.tri(lower)] result <- list(lower=lower,upper=upper) } else { if(nrow(lower) !=ncol(lower)) {stop("lower matrix must be square")} if(nrow(upper) !=ncol(upper)) {stop("upper matrix must be square")} if(nrow(lower) !=ncol(upper)) {stop("lower and upper matrices must have the same dimensions")} result <- lower colnames(result) <- colnames(upper) rownames(result) <-rownames(lower) if(diff) upper <- lower - upper result [lower.tri(result)] <- upper[lower.tri(upper)] result <- t(result) diag(result) <- NA} return(result)} #revised Oct 6, 2013 to pick up row names and column names from the two matrices psych/R/table2matrix.R0000644000176200001440000000305712253362274014335 0ustar liggesusers"table2matrix" <- function(x,labs = NULL) { n <- sum(x) nrows <- dim(x)[1] ncol <- dim(x)[2] rowval <- as.numeric(rownames(x)) colval <- as.numeric(colnames(x)) xm <- matrix(NaN,nrow=n,ncol=2) k <- 1 for (rows in 1:nrows) { for (cols in 1:ncol) { case <- x[rows,cols] if(case>0) { for (cases in 1:case) { xm[k,1] <- rowval[rows] xm[k,2] <- colval[cols] k <- k+1} #end cases } } #end cols } #end rows if(!is.null(labs)) colnames(xm)<- labs return(xm) } "table2df" <- function(x,count=NULL, labs = NULL) { if(!is.null(count)) {xm.df <- bigtable2matrix(x,count,labs) } else { n <- sum(x) nrows <- dim(x)[1] ncol <- dim(x)[2] rowval <- as.numeric(rownames(x)) colval <- as.numeric(colnames(x)) xm <- matrix(NaN,nrow=n,ncol=2) k <- 1 for (rows in 1:nrows) { for (cols in 1:ncol) { case <- x[rows,cols] if(case>0) { for (cases in 1:case) { xm[k,1] <- rowval[rows] xm[k,2] <- colval[cols] k <- k+1} #end cases } } #end cols } #end rows if(!is.null(labs)) colnames(xm)<- labs xm.df <- data.frame(xm) } return(xm.df) } "bigtable2matrix" <- function(x,count,labs=NULL) { n <- dim(x)[1] ncol <- dim(x)[2] nrows <- sum(count) xm <- matrix(NaN,nrow=nrows,ncol=ncol) k <- 1 for (i in 1 :n) { for (j in k:(k+count[i]-1)) { for (values in 1:ncol) { xm[j,values] <- x[i,values] } } k <- k+count[i] } if(!is.null(labs)) {colnames(xm) <- labs} return(xm) } psych/R/bestScale.R0000644000176200001440000010462214444455475013656 0ustar liggesusers #Modified October, 2019 to have a variable number of items per scale #Modified Sept 8, 2019 to include weighted scoring #Modified April 15, 2019 to make structure somewhat cleaner #Minor fix December 22, 2021 to allow correctly for correlation input "bestScales" <- function(x, #the raw data or a correlation matrix criteria, # the criteria (name) to predict min.item=NULL,max.item=NULL,delta=0, #parameters for multiple solutions cut=.1,n.item =10,wtd.cut = 0,wtd.n=10, #just one solution / criteria n.iter =1,folds=1,p.keyed=.9, #how many bootstraps (n.iter) or folds overlap=FALSE,dictionary=NULL,check=TRUE,impute="none", log.p=FALSE, digits=2) { cl <- match.call() first <- TRUE #check for bad input -- the Mollycoddle option if(is.vector(criteria) & any( !(criteria %in% colnames(x)) )) { cat("\nCriteria names are incorrectly specified. Offending items are ", criteria[which(!(criteria %in% colnames(x)))],"\n") stop("I am sorry. I am stopping because you did not specify the criteria correctly. See above. ")} #further check if the dictionary is specified correctly if(!is.null(dictionary)) if(length(dictionary) < 1) stop("I am sorry. I am stopping because you did not specify the dictionary correctly. ") #check and delete variables with no variance (by default) if(check) {item.var <- apply(x,2,sd,na.rm=TRUE) bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) ) { for (baddy in 1:length(bad)) {message( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} x <- x[,-bad] } } #check various parameters # frac <- 1 if(folds > 1) {frac = 1/folds if(n.iter !=folds) {n.iter <- folds cat('Number of iterations set to the number of folds = ',n.iter) } } if(n.iter>1 && isCorrelation(x)) {n.iter <- 1 folds <- 1 cat("Number of iterations and number of folds set to 1 because the you are using a correlation matrix\n") } set.seed(NULL) old.seed <- .Random.seed[42] #we save this if we want to do k-fold cross validation #### #first, define function to be parallelized #mcmapply for parallel, mapply for debugging short <- function(i,x,n.obs,criteria,cut,n.item,impute,digits,dictionary,frac,log.p=FALSE,min.item,max.item) {#this is the function that is iterated multi.score <- NULL multi.cross <- NULL if(n.iter > 1) { if(!isCorrelation(x)) { ss <- (1:n.obs) if(frac==1) {ss <- sample(ss,n.obs,replace=TRUE) #bootstrap resampling ss is 'in the bag' } else { set.seed(old.seed) #this will take the same random sequence for this run so that we can do k fold ss <- sample(ss,n.obs,FALSE) # this is the 1:n.obs in the same random order for each of the k fold ss <- ss[-(((i-1)*frac*n.obs +1):(i*frac*n.obs))] #this drops frac cases out each trial } #the main function for finding the best items is right here #probably don't need to pass dictionary every time, since we rarely examine a single pass scores <- bScales(x[ss,],criteria=criteria,cut=cut, n.item =n.item,overlap=overlap,dictionary=dictionary,impute=impute,digits=digits,log.p=log.p) #These next two lines then try to find the optimal number of items for this pass #Not clear if we really need to do this for every iteration, perhaps just for the final pooled r if(!is.null(min.item)){ multi.score <- fastValidity(items=x[ss,],criteria=criteria,r=NULL,overlap=overlap, nlow=min.item,nhigh=max.item) multi.cross <- fastCrossValidity(new.items = x[-ss,],r=multi.score$r,item.order=multi.score$item.order,criteria=criteria, nlow=min.item,nhigh=max.item,delta=0,overlap=overlap,optimal.n=multi.score$optimal.unit.n,optimal.wtd.n = multi.score$optimal.wtd.n,)} else {multi.score <- NULL} } else {message("iterative solutions not possible for correlation matrices") n.iter <- 1 }} else { # a correlation matrix or n.iter = 1 scores <- bScales(x,criteria=criteria,cut=cut, n.item =n.item, overlap=overlap, dictionary=dictionary, impute=impute, digits=digits,log.p=log.p) if(!is.null(min.item)){ multi.score <- fastValidity(items=x,criteria=criteria,r=NULL,overlap=overlap, nlow=min.item,nhigh=max.item)} else {multi.score <- NULL} } key.list <- keys2list(scores$key) #this converts the -1 and 1s to a list with the variable names if(n.iter > 1) { cross <- scoreFast(key.list,x[-ss,],impute=impute,min=1,max=6) #why are these fixed values? validity <- diag(cor(cross,x[-ss,criteria],use="pairwise")) #now, add the two new functions FastValidity and FastCrossValidity #if we just want to do the optimal number of items on the summaries, we don't need to return the multi.scores here short.result <- list(r = c(scores$r,validity),key.list=key.list,R = scores$R,multi.scores=multi.score,multi.cross=multi.cross) } else {short.result <- scores # this is the list of various objects from bScales short.result$key.list <- key.list short.result$multi.score <- multi.score short.result$multi.cross <- multi.cross} class(short.result) <- cbind("psych","bestScales") return(short.result) } #this is the result from 1 iteration of all criteria ### ### #begin the main function #if criteria is a separate data frame, combine x and criteria #there are several alternative forms for criteria #it is either a column name of x, or it is a separate data.frame/matrix if(!is.null(dim(criteria))| (length(criteria) == NROW(x))) { x <- cbind(x,criteria) if(NCOL(criteria) > 1 ){criteria <- colnames(criteria) } else {criteria <- "criteria"} #criteria <- colnames(criteria) } n.obs <- nrow(x) #if((n.iter ==1)| first ) { #don't bother to parallelize, just do one trial if((n.iter ==1)) { first.result <- short(1,x,n.obs=n.obs,criteria=criteria,cut=cut,n.item=n.item,impute=impute,digits=digits,dictionary=dictionary,frac=1,min.item=min.item,max.item=max.item) first <- FALSE result <- first.result result$best.keys <- lapply(first.result$short.key,function(x) create.ordered.key(x)) } else {first.result <- NULL} #the case for n.iter > 1. We want to parallelize this because we are working pretty hard if(n.iter > 1) { result <- list() #This does the work across n.iter and across all criteria result <- mapply(short,c(1:n.iter),MoreArgs=list(x,n.obs=n.obs,criteria=criteria,cut=cut,n.item=n.item,impute=impute,digits=digits,dictionary=dictionary,frac=frac,min.item=min.item,max.item=max.item)) #result <- mcmapply(short,c(1:n.iter),MoreArgs=list(x,n.obs=n.obs,criteria=criteria,cut=cut,n.item=n.item,impute=impute,digits=digits,dictionary=dictionary,frac=frac,min.item=min.item,max.item=max.item)) #we have done the heavy lifting, now we need to prepare various results for output. if(delta > 0) { delta <- delta /sqrt(n.obs)} result <- organize.results(result,x,n.iter=n.iter,p.keyed=p.keyed,dictionary=dictionary,wtd.cut=wtd.cut,wtd.n = wtd.n,overlap=overlap,min.item=min.item,max.item=max.item,delta=delta) #makes the function a bit cleaner by doing this in its own function #save the keys and the summary } else { #we just did a single pass, the simple summaries are already there #but we should organize them a little if(!is.null(result$scores)){final.means <- colMeans(result$scores,na.rm=TRUE) final.sd <- apply(result$scores,2,sd,na.rm=TRUE)} else {final.means<- NA final.sd <- NA} if(length(criteria) > 1 ) {crit.mean <- colMeans(x[,criteria],na.rm=TRUE) crit.sd <- apply(x[,criteria],2,sd,na.rm=TRUE)} else { crit.mean <- mean(x[,criteria],na.rm=TRUE) crit.sd <- sd(x[,criteria],na.rm=TRUE)} if(any(is.na(final.means))) {result$final.stats <- data.frame(r=result$r,crit.m=crit.mean,crit.sd =crit.sd) } else { result$final.stats <- data.frame(mean=final.means,sd=final.sd,r=result$r,crit.m=crit.mean,crit.sd =crit.sd)} result$items <- NULL } result$Call <- cl result$first.result <- first.result class(result) <- c("psych","bestScales") return(result) } ##################### ####################################### #This function takes the results from short for many trials and then tries to make sense of them ###################################### organize.results <- function(result,x=NA,n.iter=1,p.keyed=.9,dictionary=NULL,wtd.cut,wtd.n,overlap=overlap, min.item=min.item,max.item=max.item,delta=delta) { #The results are n.iter lists, each with validity,keys,R, and the results from multi.score validity <- list() #validity is a list of elements repeated n.iter times #first are the validities #then are the keys #then are the item by criteria correlations #then the multi.score matrices keys <- R.list <- multi.valid <- multi.cross <- list() #take the list from all the iterations, and organize them in a more meaningful way for(i in (1:n.iter)) { validity[[i]] <- result[["r",i]] keys[[i]] <- result[["key.list",i]] R.list[[i]] <- result [["R",i,drop=FALSE]] if(!is.null(min.item)) { multi.valid [[i]] <- result[["multi.scores",i,drop=FALSE]] multi.cross [[i]] <- result[["multi.cross",i,drop=FALSE]] } } replicated.items <- bestReplicatedItems(keys) items <- list() item.mean <- list() best.keys <- list() criteria <- names(replicated.items) optimal.n <- optimal.wtd.n <- optimal.unit.deriv <- optimal.wtd.deriv <- optimal.cross.unit <- optimal.cross.wtd <- cross.n <- cross.wtd.n <- list() #we can find the optimal length for all criteria at once if(!is.null(min.item)) { for (i in 1:n.iter) { optimal.n[[i]] <- multi.valid[[i]][["optimal.unit.n"]] optimal.wtd.n[[i]] <- multi.valid[[i]][["optimal.wtd.n"]] optimal.unit.deriv[[i]] <- multi.valid[[i]][["optimal.unit.deriv"]] optimal.wtd.deriv[[i]] <- multi.valid[[i]][["optimal.wtd.deriv"]] optimal.cross.unit[[i]] <- multi.cross[[i]][["cross.unit"]] optimal.cross.wtd[[i]] <- multi.cross[[i]][["cross.wtd"]] cross.n[[i]] <- multi.cross[[i]][["cross.n"]] cross.wtd.n[[i]] <- multi.cross[[i]][["cross.wtd.n"]] } optimal.n.mean <-apply(matrix(unlist(optimal.n),nrow=n.iter,byrow=TRUE),2,median) optimal.wtd.mean <- apply(matrix(unlist(optimal.wtd.n),nrow=n.iter,byrow=TRUE),2,median) optimal.unit.deriv <- colMeans(matrix(unlist(optimal.unit.deriv),nrow=n.iter,byrow=TRUE)) optimal.wtd.deriv <- colMeans(matrix(unlist(optimal.wtd.deriv),nrow=n.iter,byrow=TRUE)) optimal.cross.unit <- colMeans(matrix(unlist(optimal.cross.unit),nrow=n.iter,byrow=TRUE)) optimal.cross.wtd <- colMeans(matrix(unlist(optimal.cross.wtd),nrow=n.iter,byrow=TRUE)) cross.n <- apply(matrix(unlist(cross.n),nrow=n.iter,byrow=TRUE),2,median) cross.wtd.n <- apply(matrix(unlist(cross.wtd.n),nrow=n.iter,byrow=TRUE),2,median) } #but we need to find item statistics one criteria at a time for(j in 1:length(criteria)) { #first, find the means and standard deviations for each selected item rep.item <- replicated.items[[j]][replicated.items[[j]] >= n.iter * p.keyed] if(length(rep.item)==0) rep.item <- replicated.items[[j]][1] # if(length(criteria) > 1 ) {for (i in 1:n.iter) { item.mean[[i]] <- R.list[[i]][names(replicated.items[[j]][replicated.items[[j]] > n.iter * p.keyed]),criteria[j]] } # } else { for (i in 1:n.iter) {item.mean[[i]] <- R.list[[i]][names(replicated.items[[j]][replicated.items[[j]] > n.iter * p.keyed])] } } for (i in 1:n.iter) {if(length(criteria) > 1) { item.mean[[i]] <- R.list[[i]][names(rep.item),criteria[j]]} else {item.mean[[i]] <- R.list[[i]][names(rep.item)]} } item.m <- matrix(unlist(item.mean),nrow=n.iter,byrow=TRUE) colnames(item.m) <- names(rep.item) means = colMeans(item.m,na.rm=TRUE) sds <- apply(item.m,2,sd,na.rm=TRUE) # Freq <- colSums(!is.na(item.m)) #This is the total number of items and just reflect n.iter Freq <- as.vector(rep.item) names(Freq) <- names(rep.item) #items [[criteria[j] ]] <- cbind(replicated.items[[j]],Freq=Freq,mean.r=means,sd.r = sds,dictionary[names(replicated.items[[j]]),]) items[[criteria[j]]] <- cbind(Freq,mean.r = means,sd.r = sds,dictionary[names(rep.item),,drop=FALSE]) items[[criteria[j]]] <- psychTools::dfOrder(items [[criteria[j] ]],"-mean.r",absolute=TRUE) #sort on the mean.r column # items[[criteria[j]]] <- items[[criteria[j]]][items[[criteria[j]]][,"Freq"] >= n.iter * p.keyed,] #now prepare the best.keys list if(!is.null(dim(items[[criteria[[j]] ]] ))){ direction <- sign(items[[criteria[[j]] ]][,"mean.r"]) direction <- as.matrix(direction) rownames(direction) <- rownames(items[[criteria[[j]] ]]) count <- items[[criteria[[j]]]][,"Freq"]} else { if(!is.null(items[[criteria[[j]] ]])) { # items [[criteria[j] ]] <- cbind(Freq=Freq,mean.r=means,sd.r = sds,dictionary[names(replicated.items[[j]]),]) items [[criteria[j] ]] <- cbind(Freq=Freq,mean.r=means,sd.r = sds,dictionary[names(replicated.items[[j]]),]) direction <- sign(items[[criteria[[j]] ]]["mean.r"]) names(direction) <- names(Freq) direction <- as.matrix(direction) count <- items[[criteria[[j]]]][1] } else {count <- 0} } count <- count >= n.iter*p.keyed if(sum(count,na.rm=TRUE) > 0) { best.keys[[j]] <- rownames(direction)[count] direction <- direction[count,drop=FALSE] if(length(direction)> 1) { best.keys[[j]][direction < 0] <- paste0("-", best.keys[[j]][direction < 0]) } if((length(direction) ==1) && (!is.na(direction))) { best.keys[[j]][direction < 0] <- paste0("-", best.keys[[j]][direction < 0]) } } else { best.keys[[j]] <- NA } } #Find the mean, zero order correlation of each item with each criteria #We do this by pooling the data in R.list mean.raw.r <- matrix(unlist(R.list),ncol=NCOL(R.list[[1]]) * NROW(R.list[[1]]),byrow=TRUE ) sd.raw.r <- apply(mean.raw.r,2,sd,na.rm=TRUE) sd.raw.r <- matrix(sd.raw.r,ncol=length(criteria)) mean.raw.r <- matrix(colMeans(mean.raw.r,na.rm=TRUE),ncol=length(criteria)) if(length(criteria) == 1) {colnames(mean.raw.r) <- criteria rownames(mean.raw.r) <- names(R.list[[1]])} else {colnames(mean.raw.r) <- colnames(R.list[[1]]) rownames(mean.raw.r) <- rownames(R.list[[1]])} final.mean.r <- mean.raw.r mean.raw.r[abs(mean.raw.r) < wtd.cut] <- 0 #now, drop all except the wtd.n items ny <- length(criteria) nvar <- NROW(mean.raw.r) if(ny > 1 ) {ord <- apply(abs(mean.raw.r[,criteria]),2,order,decreasing=TRUE) for (i in 1:ny) {mean.raw.r[ord[(wtd.n+1):nvar,i],criteria[i]] <- 0 } } else { ord <- order(abs(mean.raw.r),decreasing=TRUE) for (i in 1:ny) {mean.raw.r[ord[(wtd.n+1):nvar]] <- 0 } } N.wtd <- colSums(abs(mean.raw.r) >0) if(length(best.keys) == length(criteria)) names(best.keys) <- criteria #Find the results for best keys final.scale <- scoreFast(best.keys,x) #these are the unit weighted final.raw.scale <- scoreWtd(mean.raw.r,x) #these are the zero order weighted scores final.raw.valid <- diag(cor(final.raw.scale,x[,criteria,drop=FALSE],use="pairwise") ) final.valid <- diag(cor(final.scale, x[,criteria,drop=FALSE],use="pairwise") ) final.means <- colMeans(final.scale,na.rm=TRUE) final.sd <- apply(final.scale,2,sd,na.rm=TRUE) crit.mean <- colMeans(x[,criteria,drop=FALSE],na.rm=TRUE) crit.sd <- apply(x[,criteria,drop=FALSE],2,sd,na.rm=TRUE) result.df <- data.frame(matrix(unlist(validity),ncol=2*length(criteria),byrow=TRUE)) colnames(result.df) <-c(paste("derivation",criteria),paste("validation",criteria)) if(!is.null(min.item)) { multi.derivation.df <- data.frame(n=optimal.n.mean,unit=optimal.unit.deriv,n.wtd=optimal.wtd.mean,wtd=optimal.wtd.deriv,valid.n=cross.n,valid.unit=optimal.cross.unit,valid.wtd.n = cross.wtd.n,valid.wtd=optimal.cross.wtd) rownames(multi.derivation.df ) <- criteria } else {multi.derivation.df <- NULL} ncriteria <- length(criteria) if(!is.null(min.item)){ final.multi.validities <- fastValidity(x,criteria,r=final.mean.r, nlow=min.item, nhigh=max.item,overlap=overlap) final.order <- final.multi.validities$item.order final.item.valid.list <- list() for (j in 1 : ny ) {if(!is.null(dictionary)) {final.item.valid.list[[criteria[j]]] <- data.frame(item=rownames(final.mean.r)[final.order[1:max.item,j]] ,r=final.mean.r[final.order[1:max.item,j],j],unit = final.multi.validities$unit.deriv[,j],wtd=final.multi.validities$wtd.deriv[,j],dictionary[rownames(final.mean.r)[final.order[1:max.item,j]],]) } else { final.item.valid.list[[criteria[j]]] <- data.frame(item=rownames(final.mean.r)[final.order[1:max.item,j]],r=final.mean.r[final.order[1:max.item,j],j],unit = final.multi.validities$unit.deriv[,j],wtd=final.multi.validities$wtd.deriv[,j])} } } else {final.multi.validities <- NULL final.item.valid.list <- NULL} #now, organize the output object into a reasonable order result <- list() results <- list() #to hold things we don't actually want to return #now get out the items and incremental validities for each scale results$means = colMeans(result.df,na.rm=TRUE) results$sd <- apply(result.df,2,sd,na.rm=TRUE) result$summary <- data.frame(derivation.mean= results$means[1:ncriteria],derivation.sd = results$sd[1:ncriteria],validation.m=results$mean[(ncriteria+1):(2*ncriteria)], validation.sd =results$sd[(ncriteria+1):(2*ncriteria)],final.valid = final.valid,final.wtd=final.raw.valid,N.wtd=N.wtd ) rownames(result$summary) <- criteria #result <- list(validity = result.df,multi.validities=multi.derivation.df,items=items,replicated.items =replicated.items,keys = keys,final.mean.r =final.mean.r,multi.validities=multi.valid) result$optimal <- multi.derivation.df result$best.keys <- best.keys result$weights <- mean.raw.r result$final.item.list <- final.item.valid.list result$multi.validities <- final.multi.validities result$items <- items if(!is.null(min.item)) { result$optimal.keys <- optimal.keys(final.multi.validities,delta=delta) result$optimal.weights <- optimal.weights(final.multi.validities,delta=delta) n.optimal.unit <- sapply(result$optimal.keys,length) n.optimal.wtd <- apply(result$optimal.weights,2,function(x) sum(abs(x) > 0) ) result$optimal <- data.frame(result$optimal,n.final=n.optimal.unit,n.wtd.final = n.optimal.wtd) } result$stats <- data.frame(mean=results$means,se=results$sd) # result$final <- final.valid result$scores <- final.scale result$wtd.scores <- final.raw.scale result$final.stats <- data.frame(mean=final.means,sd=final.sd,r=final.valid,crit.mean = crit.mean,crit.sd=crit.sd,final.wtd=final.raw.valid,N.wtd=N.wtd) # result$sd.weights <- sd.raw.r # result$final.raw <- final.raw.valid return(result) } ########################################### #end of organize results ########################################### #This one actually does the work -- but should not process n.item at this point "bScales" <- function(x,criteria,cut=.1,n.item =10, overlap=FALSE,dictionary=NULL,impute="median",digits=2,log.p=FALSE) { #created 20/2/14 #find the scales based upon the items that most correlate with a criteria #pure dust bowl empiricism #modified 13/3/15 to handle the problem of missing item labels #Completely redone June 2017 to allow for raw data and bootstrapping ## #begin the main function ##Basically two cases: #a correlation matrix is provided and we do basic matrix algebra #or raw data are provided (getting ready to do bootstrapping) and we find just the necessary correlations nvar <- ncol(x) n.item <- min(n.item,nvar-1) if(isCorrelation(x) | (all(colnames(x) %in% rownames(x)))) {r <- x # case 1 raw <- FALSE nvar <- NROW(r) } else { #case 2 y <- x[,criteria] if(log.p) {r <- log(corr.test(x,y)$p)} else { r <- cor(x,y,use="pairwise")} colnames(r) <- criteria x <- as.matrix(x) raw <- TRUE n.obs <- NROW(x)} #don't actually need to have a square matrix ny <- length(criteria) nc <- length(cut) ni <- length(n.item) #number of items per scale to find ord.name <- NULL if(length(cut) == 1) cut <- rep(cut,ny) if(length(n.item) == 1) n.item <- rep(n.item,ny) # #We have the correlations with the criteria, we can #this next part just finds the cut values to use if(!overlap) {r[criteria,criteria] <- 0} else {for(i in 1:ny) r[criteria[i],criteria[i]] <- 0} if(ny > 1 ) {ord <- apply(abs(r[,criteria]),2,order,decreasing=TRUE) for (i in 1:ny) {cut[i] <- max(cut[i],abs(r[ord[n.item[i],i],criteria[i]])) } } else { ord <- order(abs(r[,criteria]),decreasing=TRUE) for (i in 1:ny) {cut[i] <- max(cut[i],abs(r[ord[n.item[i]+1],criteria])) } } # cut has been adjusted #The unit weightsr key <- matrix(0,ncol=ny,nrow=nvar) key[t(t(r[,criteria]) >= cut)] <- 1 key[t(t(r[,criteria]) <= -cut)]<- -1 rownames(key) <- rownames(r) colnames(key) <- criteria k <- key #this just gets it to be a matrix of the right size and names #colnames(key) <- paste(criteria,"S",sep=".") #colnames(key) <- criteria #now, drop those items from the keys that are not used used <- rowSums(abs(key)) key <- key[used > 0,,drop=FALSE] x <- x[,used >0,drop=FALSE] if(NCOL(x) < 1) stop("No items met the criteria. Try changing the cut value.") #now, if we have raw data, find the correlation of the composite scale with the criteria #if we have raw data, then we find the scales from the data if(raw) { #case 2 #score <- matrix(NA,ncol=ny,nrow=nrow(x)) #for (i in (1:ny)) { # score[,i] <- rowSums(t((key[,i])* t(x)),na.rm=TRUE) # } score <- scoreFast(key,x,impute=impute,min=1,max=6) #min and max should not be fixed values R <- diag(cor(score,y,use="pairwise")) #the validities re <- r[,criteria] ni <- colSums(abs(key)) } else { #case 1 (from a correlation matrix) score <- NULL r <- r[,used > 0,drop=FALSE] k<- key #get the matrix dimensions right ss <- rownames(key) if(any(is.na(r))) {#Are there any bad values for(i in 1:ny) {#key[,i] <- findBad(key[,i],r) #Drop the bad items from any scoring key k[,i] <- colSums((key[,i]) * (r[ss,ss]),na.rm=TRUE)} #replace matrix addition with a colSums k <- t(k) } else {#otherwise, don't bother C <- t(t(key) %*% t(r[criteria,,drop=FALSE])) #criterion covariance V <- t(key) %*% r[ used > 0,] %*% key #predictor variance # k <- t(t(key) %*% t(r[criteria,,drop=FALSE])) #we can do the matrix multiply because there are no bad data } # V <- t(k) %*% key #this is the covariance of the criteria with criteria # C <- k[criteria,] if(ny < 2) {re <- r[criteria,] R <- C/sqrt(V)} else { # R <- diag(C/sqrt(V)) R <- C /sqrt(diag(V)) #re <- diag(k[criteria,])/sqrt(diag(C)) } ni <- colSums(abs(key)) #R <- cov2cor(C) r <- t(r) re <- r[,criteria] } short.key <- list() value <- list() #R is the correlation with the criterion #re is the correlations of each item with the criteria for(i in 1:ny) {short.key[[criteria[i]]] <- round(key.value(key[,i,drop=FALSE],r),digits=digits) #actually we should not bother with the dictionary here, just at the summary level if(!is.null(dictionary)) {if(!is.factor(dictionary)) {temp <- lookup(rownames(short.key[[criteria[i]]]),dictionary) #this next line needs to be rethought -- the merge command is very slow value[[criteria[[i]]]] <- merge(short.key[[i]],temp,by="row.names",all.x=TRUE,sort=FALSE) rownames( value[[criteria[[i]]]]) <- value[[criteria[[i]]]][,1] value[[criteria[[i]]]] <- value[[criteria[[i]]]][-1] #this looks weird but is because there is an extra name ord <- order(abs(value[[criteria[[i]]]][[criteria[[i]]]]),decreasing=TRUE) value[[criteria[[i]]]] <- value[[criteria[[i]]]][ord,] } }} bScales.results <- list(r=R,n.items=ni,R=re,cut=cut,short.key=short.key,value=value,key=key,ordered=ord.name,scores=score) class(bScales.results) <- c("psych","bestScales") #This is the solution for one pass return(bScales.results) } ################################ #various minor functions used in bestScales #first, declare a function to identify the bad items and drop them from the keys findBad <- function(key,r) { ss <- abs(key) > 0 rss <- r[ss,ss] if(any(is.na(rss))){ #some of these are bad n.bad <- apply(rss,1,function(x) sum(is.na(x))) key[names(which.max(n.bad))] <- 0 findBad(key,r)} return(key) } key.value <- function(key,r) { kn <- names(key[abs(key[,1]) > 0,1]) if(is.null(kn)) kn <- names(which(abs(key[,1]) > 0)) cn <- colnames(key) ord <- order(abs(r[kn,cn]),decreasing=TRUE) kn <- kn[ord] result <- r[kn,cn,drop=FALSE] return(result) } #added the test for length>1 for the null case 4/16/23 create.ordered.key <- function(x) { if(length(x)>1 ){ for (i in 1: length(x)) { if(sign(x[i])<0 ) rownames(x)[i] <- paste0("-",rownames(x)[i]) }} return(rownames(x)) } "finalsummary" <- function(r,keys) { } "bestReplicatedItems" <- function( L) { n.iters <- length(L) # n.vars <- NCOL(L[[1]]) vars <- names(L[[1]]) n.vars<- length(vars) item.nums <- list() one.criterion <- list() for (j in 1:n.vars) { #do this over the criteria for (i in 1:n.iters) {one.criterion[[i]] <- L[[i]][j] } select <- sub("-","",unlist(one.criterion)) item.nums[[vars[j]]] <- sort(table(select),decreasing=TRUE) nam.items <- names(item.nums) } item.nums <- as.vector(item.nums) names(item.nums) <- nam.items return(item.nums) } predict_bestScales <- function(object,data,new.data) {keys <- object$keys if (is.null(keys)){ keys<- object$key.list keys <- make.keys(data,keys) } stats <- describe(data,fast=TRUE,ranges=FALSE) old.mean <- stats$mean old.sigm <- stats$sd z.data <- scale(new.data,center=stats$mean,scale=stats$sd) z.data[is.na(z.data)] <- 0 predicted <- z.data %*% keys return(predicted) } #does not do what I want predict_wtdScales <- function(object,data,new.data) {weights <- object$weights stats <- describe(data,fast=TRUE,ranges=FALSE) old.mean <- stats$mean old.sigm <- stats$sd predicted <- scoreWtd(weights,new.data) predicted <- scale(predicted,center=stats$mean,scale=stats$sd) return(predicted) } ##### #print.psych.bestscales is called from the psych.print function print_psych.bestScales <- function(x,digits=2,short=NULL,...) { cat("\nCall = ") print(x$Call) if(!is.null(x$items)) { print(x$summary,digits=digits) if(!is.null(x$optimal)) { cat("\n Optimal number of items, derivation and cross validation\n") print(x$optimal,digits=digits) } # x$replicated.items items <- x$items size <- NCOL(items[[1]]) nvar <- length(items) if(is.null(x$optimal)) { cat("\n Best items on each scale with counts of replications\n") for(i in 1:nvar) { cat("\n Criterion = ",names(items[i]),"\n") if(length(items[[i]]) > 3) { temp <- data.frame(items[[i]]) temp[2:3] <- round(temp[2:3],digits)} else{temp <- items[[i]] temp[2:3] <- round(temp[2:3],digits) } print(temp) }} else { items <- x$final.item.list cat("\n Best items on each scale with cumulative validities\n") for(i in 1:nvar) { cat("\n Criterion = ",names(items[i]),"\n") temp <- data.frame(items[[i]]) temp <- temp[-1] if(length(items[[i]]) > 3) { temp[1:3] <- round(temp[1:3],digits)} else{temp <- items[[i]] temp[1:3] <- round(temp[1:3],digits) } print(temp) }} # print(items) } else { df <- data.frame(correlation=x$r,n.items = x$n.items) cat("The items most correlated with the criteria yield r's of \n") print(round(df,digits=digits)) if(length(x$value) > 0) {cat("\nThe best items, their correlations and content are \n") print(x$value) } else {cat("\nThe best items and their correlations are \n") for(i in 1:length(x$short.key)) {print(round(x$short.key[[i]],digits=digits))} } } } #end of print.psych.bestScales #These next two functions were added in October, 2019 to allow for finding the maximum value of cross.validated as a function of n.item #ValidityList <- mapply(FastValidity,c(1:ny),MoreArgs=list(nlow=nlow,nhigh=nhigh)) #probably not that helpful ### Find predictions and validities for scales from nlow to nhigh number of items fastValidity <- function(items, criteria,r=NULL,nlow,nhigh,overlap) { #r and criteria are found before ny <-length(criteria) if(is.null(r)) r <- cor(items,items[criteria],use="pairwise") wtd.validity <- unit.validity <- matrix(NA,nrow=nhigh,ncol= ny) if(!overlap) {r[criteria,criteria] <- 0} else {for(i in 1:ny) r[criteria[i],criteria[i]] <- 0} colnames(unit.validity) <- criteria colnames(wtd.validity) <- criteria item.min <- min(items,na.rm=TRUE) item.max <- max(items,na.rm = TRUE) if(item.max < 10) { item.range.correction <- item.max - item.min + 1} else item.range.correction <- 7 #this is the case where we include some weird items, like age for(scale in 1:ny) { if(ny > 1 ) {ord <- apply(abs(r[,criteria,drop=FALSE]),2,order,decreasing=TRUE) } else { ord <- matrix(order(abs(r[,criteria,drop=FALSE]),decreasing=TRUE)) rownames(ord) <- rownames(r) colnames(ord) <- criteria } abs.item <- t(t(items) * sign(r[,scale]) + sign(r[,scale] < 0) *item.range.correction ) #this gets all the items to be scored and adds in the max - min + 1 wtd.item <- t(t(items) * r[,scale] + sign(r[,scale] < 0) *item.range.correction ) #these are the wtd item scores for (j in nlow: nhigh){ # temp <- abs.item[,ord[1:j,scale,drop=FALSE]] # wtd.temp <- wtd.item[,ord[1:j,scale,drop=FALSE]] if(j > 1) {scores <- rowMeans( abs.item[,ord[1:j,scale,drop=FALSE]] ,na.rm=TRUE) wtd.scores <- rowMeans(wtd.item[,ord[1:j,scale,drop=FALSE]],na.rm=TRUE) } else {scores <- abs.item[,ord[1:j,scale,drop=FALSE]] wtd.scores <- wtd.item[,ord[1:j,scale,drop=FALSE]]} unit.validity[j,scale] <- cor(scores,items[,criteria[scale]],use="pairwise") wtd.validity [j,scale] <- cor(wtd.scores,items[,criteria[scale]],use="pairwise") } } optimal.unit.n <- apply(unit.validity,2,which.max) optimal.wtd.n <- apply(wtd.validity,2,which.max) optimal.unit.valid <- apply(unit.validity,2,max) optimal.wtd.valid <- apply(wtd.validity,2,max) result <- list(optimal.unit.n=optimal.unit.n, optimal.wtd.n = optimal.wtd.n, optimal.unit.deriv=optimal.unit.valid, optimal.wtd.deriv=optimal.wtd.valid, unit.deriv=unit.validity,wtd.deriv = wtd.validity,item.order = ord,r=r,item.range.correction=item.range.correction) return(result) } #This takes the item order from fastValidity and applies it to a new data set,using the old correlations fastCrossValidity <- function(new.items,r,item.order,criteria,nlow,nhigh,overlap,optimal.n,optimal.wtd.n,delta=0,item.range.correction=0) { #r and order are from the derivation set ny <-length(criteria) #r and item.order are from the derivation sample wtd.cross <- unit.cross <- matrix(NA,nrow=nhigh,ncol= ny) if(!overlap) {r[criteria,criteria] <- 0} else {for(i in 1:ny) r[criteria[i],criteria[i]] <- 0} colnames(unit.cross) <- criteria colnames(wtd.cross) <- criteria ord <- item.order for(scale in 1:ny) { abs.item <- t(t(new.items) * sign(r[,scale]) + sign(r[,scale] < 0) * item.range.correction) #this gets all the items to be scored wtd.item <- t(t(new.items) * r[,scale] + item.range.correction) #these are the wtd item scores for (j in nlow: nhigh){ temp <- abs.item[,ord[1:j,scale,drop=FALSE]] wtd.temp <- wtd.item[,ord[1:j,scale,drop=FALSE]] if(j > 1) {scores <- rowMeans(temp[,1:j],na.rm=TRUE) wtd.scores <- rowMeans(wtd.temp[,1:j],na.rm=TRUE) } else {scores <- abs.item[,ord[1:j,scale,drop=FALSE]] wtd.scores <- wtd.item[,ord[1:j,scale,drop=FALSE]]} unit.cross[j,scale] <- cor(scores,new.items[,criteria[scale]],use="pairwise") wtd.cross [j,scale] <- cor(wtd.scores,new.items[,criteria[scale]],use="pairwise") } cross.unit.valid <- apply(unit.cross,2,max) cross.wtd.valid <- apply(wtd.cross,2,max) # temp <- apply(unit.cross,2,function(x) which(x >= (max(x) - delta))) #if(is.list(temp)) {cross.unit.n <- sapply(temp,function(x) x[1],simplify=TRUE) } else {cross.unit.n <- temp[1]} cross.unit.n <- apply(unit.cross,2,which.max) cross.wtd.n <- apply(wtd.cross,2,which.max) optimal.cross.unit <- diag(unit.cross[optimal.n,1:ny,drop=FALSE]) optimal.cross.wtd <-diag( wtd.cross[optimal.wtd.n,1:ny,drop=FALSE]) } result <- list(unit.cross=unit.cross, wtd.cross = wtd.cross, cross.unit= optimal.cross.unit, cross.wtd=optimal.cross.wtd, cross.n=cross.unit.n, cross.wtd.n=cross.wtd.n) return(result) } optimal.keys <- function(L,delta=0) { #take the information from multi.validities and create a keys.list and a weights matrix criteria <- names(L[["optimal.unit.n"]]) n <-L[["optimal.unit.n"]] unit.cross <- L[["unit.deriv"]] if(delta>0) { temp <- apply(unit.cross,2,function(x) which(x >= (max(x,na.rm=TRUE) - delta))) if(is.list(temp)) {n <- sapply(temp,function(x) x[1],simplify=TRUE) } else {if (is.vector(temp)) {n <- temp} else {n <- temp[1,,drop=FALSE]}} } item.order <- L [["item.order"]] var.names <- rownames(L[["r"]]) r <- L [["r"]] keys <- direction <- list() for (j in 1:length(criteria)) { keys[[j]] <- var.names[item.order[1:n[j],j]] direction <- sign(r[item.order[1:n[j]],j]) keys[[j]][direction <0 ] <- paste0("-",keys[[j]][direction < 0]) } names(keys) <- criteria return(keys) } optimal.weights <- function(L,delta=0) { #take the information from multi.validities and create a keys.list and a weights matrix criteria <- names(L[["optimal.unit.n"]]) n <-L[["optimal.unit.n"]] wtd.cross <- L[["wtd.deriv"]] if(delta>0) { temp <- apply(wtd.cross,2,function(x) which(x >= (max(x,na.rm=TRUE) - delta))) if(is.list(temp)) {n <- sapply(temp,function(x) x[1],simplify=TRUE) } else {if (is.vector(temp)) {n <- temp} else {n <- temp[1,,drop=FALSE]}} } item.order <- L [["item.order"]] var.names <- rownames(L[["r"]]) weights <- L [["r"]] cut <- abs(diag(weights[diag(item.order[n,,drop=FALSE]),,drop=FALSE])) weights[t(abs(t(weights)) < cut)] <- 0 return(weights) } psych/R/structure.list.R0000644000176200001440000000161711141172325014740 0ustar liggesusers"structure.list" <- function(nvars,f.list,f=NULL,f.labels=NULL,item.labels=NULL) { nfactors <- length(f.list) fmodel <- matrix(rep(0,nvars*nfactors),ncol=nfactors) for (i in 1:nfactors) { if(!is.null(f.list[[i]])) { list.i <- unlist(f.list[[i]]) fmodel[abs(list.i),i] <- paste(f,letters[i],list.i,sep="") } } if(!is.null(f.labels)) {colnames(fmodel) <- f.labels} else {colnames(fmodel) <- names(f.list)} if(!is.null(item.labels)) {rownames(fmodel) <- item.labels} return(fmodel)} #written Jan 22, 2009 "phi.list" <- function(nf,f.list,f.labels=NULL) { nkeys <- length(f.list) phi <- diag(1,nf,nf) for (i in 1:nkeys) { list.i <- unlist(f.list[[i]]) phi[list.i,i] <- paste("r",letters[i],letters[list.i],sep="") } if(!is.null(f.labels)) {colnames(phi) <- f.labels} else {colnames(phi) <- paste("F",1:nf,sep="")} rownames(phi) <- colnames(phi) return(phi)} #written Jan 22, 2009psych/R/factor.rotate.R0000644000176200001440000000136411764663336014522 0ustar liggesusers"factor.rotate" <- function(f,angle,col1=1,col2=2,plot=FALSE,...) { #hand rotate two factors from a loading matrix #see the GPArotation package for much more elegant procedures if (!is.matrix(f) ) {f <-f$loadings} nvar<- dim(f)[2] if(!is.matrix(f)) {if(!is.data.frame(f)) {stop("f must be either a data frame or a matrix")} else {f <- as.matrix(f)} } rot<- diag(1,nvar,nvar) theta<- pi*angle/180 rot[col1,col1]<- cos(theta) rot[col2,col2]<- cos(theta) rot[col1,col2]<- -sin(theta) rot[col2,col1]<- sin(theta) result <- f %*% rot if(plot) {fa.plot(result,...) abline(a=0,b=tan(-theta),lty="dashed") abline(a=0,b=tan(-theta+ pi/2),lty="dashed") } return(result) } psych/R/pairs.panels.R0000744000176200001440000002232114222640413014322 0ustar liggesusers#Adapted from the help for pairs #modified December 15, 2011 to add the rug option #further modified March 30, 2012 to add the method of correlation option (suggested by Carsten Dormann). #Fixed a bug in show.points on March 18, 2017 (reported by Matthew Labrum) #August 11, 2017 Added confidence intervals and adjust the histogram so it lines up with wht data points (suggested by Julan Martin) #by moving all the little functions to be outside the main function, this allows method and rug to be passed to these lower order functions. #this should allow for somewhat cleaner code for the other functions #modified March 15, 2015 to add the ability to control the size of the correlation separately from the cex variable in the points #also added the ability to set the number of breaks in the histograms "pairs.panels" <- function (x, smooth = TRUE, scale = FALSE, density=TRUE,ellipses=TRUE,digits = 2, method="pearson",pch = 20,lm=FALSE,cor=TRUE,jiggle=FALSE,factor=2,hist.col="cyan",show.points=TRUE,rug=TRUE, breaks="Sturges", cex.cor = 1 ,wt=NULL,smoother=FALSE,stars=FALSE,ci=FALSE,alpha=.05,...) #combines a splom, histograms, and correlations { #First define all the "little functions" that are internal to pairs.panels. This allows easier coding later "panel.hist.density" <- function(x,...) { usr <- par("usr"); on.exit(par("usr")) # par(usr = c(usr[1]-abs(.05*usr[1]) ,usr[2]+ abs(.05*usr[2]) , 0, 1.5) ) par(usr = c(usr[1] ,usr[2] , 0, 1.5) ) tax <- table(x) if(length(tax) < 11) {breaks <- as.numeric(names(tax)) y <- tax/max(tax) interbreak <- min(diff(breaks))*(length(tax)-1)/41 rect(breaks-interbreak,0,breaks + interbreak,y,col=hist.col) } else { h <- hist(x,breaks=breaks, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y,col=hist.col) } if(density) { tryd <- try( d <- density(x,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE) if(!inherits(tryd,"try-error")) { d$y <- d$y/max(d$y) lines(d)}} if(rug) rug(x) } "panel.cor" <- function(x, y, prefix="",...) { usr <- par("usr"); on.exit(par("usr")) par(usr = c(0, 1, 0, 1)) if(is.null(wt)) { r <- cor(x, y,use="pairwise",method=method)} else { r <- cor.wt(data.frame(x,y),w=wt[,c(1:2)])$r[1,2]} txt <- format(c(round(r,digits), 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(stars) {pval <- r.test(sum(!is.na(x*y)),r)$p symp <- symnum(pval, corr = FALSE,cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*"," "),legend=FALSE) txt <- paste0(txt,symp)} cex <- cex.cor*0.8/(max(strwidth("0.12***"),strwidth(txt))) if(scale) {cex1 <- cex * abs(r) if(cex1 < .25) cex1 <- .25 #otherwise they just vanish text(0.5, 0.5, txt, cex = cex1) } else { text(0.5, 0.5, txt,cex=cex)} } "panel.smoother" <- function (x, y,pch = par("pch"), col.smooth = "red", span = 2/3, iter = 3, ...) { # usr <- par("usr"); on.exit(par(usr)) # par(usr = c(usr[1]-abs(.05*usr[1]) ,usr[2]+ abs(.05*usr[2]) , usr[3],usr[4]) ) #doensn't affect the axis correctly xm <- mean(x,na.rm=TRUE) ym <- mean(y,na.rm=TRUE) xs <- sd(x,na.rm=TRUE) ys <- sd(y,na.rm=TRUE) r = cor(x, y,use="pairwise",method=method) if(jiggle) { x <- jitter(x,factor=factor) y <- jitter(y,factor=factor)} if(smoother) {smoothScatter(x,y,add=TRUE, nrpoints=0)} else {if(show.points) points(x, y, pch = pch, ...)} ok <- is.finite(x) & is.finite(y) if (any(ok)) { if(smooth & ci) { lml <- loess(y~x ,degree=1,family="symmetric") tempx <- data.frame(x = seq(min(x,na.rm=TRUE),max(x,na.rm=TRUE),length.out=47)) pred <- predict(lml,newdata=tempx,se=TRUE ) if(ci) { upperci <- pred$fit + confid*pred$se.fit lowerci <- pred$fit - confid*pred$se.fit polygon(c(tempx$x,rev(tempx$x)),c(lowerci,rev(upperci)),col=adjustcolor("light grey", alpha.f=0.8), border=NA) } lines(tempx$x,pred$fit, col = col.smooth, ...) #this is the loess fit } else {if(smooth) lines(stats::lowess(x[ok],y[ok],f=span,iter=iter),col=col.smooth) }} if(ellipses) draw.ellipse(xm,ym,xs,ys,r,col.smooth=col.smooth,...) #this just draws the ellipse } "panel.lm" <- function (x, y, pch = par("pch"), col.lm = "red", ...) { ymin <- min(y) ymax <- max(y) xmin <- min(x) xmax <- max(x) ylim <- c(min(ymin,xmin),max(ymax,xmax)) xlim <- ylim if(jiggle) { x <- jitter(x,factor=factor) y <- jitter(y,factor=factor)} if(smoother) {smoothScatter(x,y,add=TRUE, nrpoints=0)} else {if(show.points) {points(x, y, pch = pch,ylim = ylim, xlim= xlim, ...)}}# if(show.points) points(x, y, pch = pch,ylim = ylim, xlim= xlim,...) ok <- is.finite(x) & is.finite(y) if (any(ok)) { lml <- lm(y ~ x) if(ci) { tempx <- data.frame(x = seq(min(x,na.rm=TRUE),max(x,na.rm=TRUE),length.out=47)) pred <- predict.lm(lml,newdata=tempx,se.fit=TRUE) #from Julian Martins upperci <- pred$fit + confid*pred$se.fit lowerci <- pred$fit - confid*pred$se.fit polygon(c(tempx$x,rev(tempx$x)),c(lowerci,rev(upperci)),col=adjustcolor("light grey", alpha.f=0.8), border=NA) } if(ellipses) { xm <- mean(x,na.rm=TRUE) ym <- mean(y,na.rm=TRUE) xs <- sd(x,na.rm=TRUE) ys <- sd(y,na.rm=TRUE) r = cor(x, y,use="pairwise",method=method) draw.ellipse(xm,ym,xs,ys,r,col.smooth=col.lm,...) #just draw the ellipse } abline(lml, col = col.lm, ...) } } "draw.ellipse" <- function(x=0,y=0,xs=1,ys=1,r=0,col.smooth,add=TRUE,segments=51,...) { #based upon John Fox's ellipse functions angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) if(!is.na(r)) { if (abs(r)>0 )theta <- sign(r)/sqrt(2) else theta=1/sqrt(2) shape <- diag(c(sqrt(1+r),sqrt(1-r))) %*% matrix(c(theta,theta,-theta,theta),ncol=2,byrow=TRUE) ellipse <- unit.circle %*% shape ellipse[,1] <- ellipse[,1]*xs + x ellipse[,2] <- ellipse[,2]*ys + y if(show.points) points(x,y,pch=19,col=col.smooth,cex=1.5 ) #draw the mean lines(ellipse, ...) } } "panel.ellipse" <- function (x, y, pch = par("pch"), col.smooth = "red", ...) { segments=51 usr <- par("usr"); on.exit(par("usr")) par(usr = c(usr[1]-abs(.05*usr[1]) ,usr[2]+ abs(.05*usr[2]) , 0, 1.5) ) xm <- mean(x,na.rm=TRUE) ym <- mean(y,na.rm=TRUE) xs <- sd(x,na.rm=TRUE) ys <- sd(y,na.rm=TRUE) r = cor(x, y,use="pairwise",method=method) if(jiggle) { x <- jitter(x,factor=factor) y <- jitter(y,factor=factor)} if(smoother) {smoothScatter(x,y,add=TRUE, nrpoints=0)} else {if(show.points) {points(x, y, pch = pch, ...)}} angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) if(!is.na(r)) { if (abs(r)>0 ) theta <- sign(r)/sqrt(2) else theta=1/sqrt(2) shape <- diag(c(sqrt(1+r),sqrt(1-r))) %*% matrix(c(theta,theta,-theta,theta),ncol=2,byrow=TRUE) ellipse <- unit.circle %*% shape ellipse[,1] <- ellipse[,1]*xs + xm ellipse[,2] <- ellipse[,2]*ys + ym points(xm,ym,pch=19,col=col.smooth,cex=1.5 ) #draw the mean if(ellipses) lines(ellipse, ...) } } ####### #Beginning of the main function ###### #The original organization was very clunky, but has finally been cleaned up with lots of extra comments removed (8/13/17) old.par <- par(no.readonly = TRUE) # save default, for resetting... on.exit(par(old.par)) #and when we quit the function, restore to original values if(missing(cex.cor)) cex.cor <- 1 #this allows us to scale the points separately from the correlations for(i in 1:ncol(x)) { #treat character data as numeric if(is.character(x[[i]] )) { x[[i]] <- as.numeric(as.factor(x[[i]]) ) colnames(x)[i] <- paste(colnames(x)[i],"*",sep="")} } n.obs <- nrow(x) confid <- qt(1-alpha/2,n.obs-2) #used in finding confidence intervals for regressions and loess if(!lm) { #the basic default is here if(cor) { pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor , lower.panel = panel.smoother, pch=pch, ...)} else { pairs(x, diag.panel = panel.hist.density, upper.panel = panel.smoother, lower.panel = panel.smoother, pch=pch, ...)} } else { #lm is TRUE if(!cor) { #this case does not show the correlations, but rather shows the regression lines above and below the diagonal pairs(x, diag.panel = panel.hist.density, upper.panel = panel.lm, lower.panel = panel.lm, pch=pch, ...) } else { #the normal case is to show the regressions below and the rs above pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor, lower.panel = panel.lm,pch=pch, ...) } } } #end of pairs.panels ### "histo" <- function(x,breaks="Sturges", ...) { tax <- table(x) if(length(tax) < 11) {breaks <- as.numeric(names(tax)) y <- tax/max(tax) interbreak <- min(diff(breaks))*(length(tax)-1)/21 rect(breaks-interbreak,0,breaks + interbreak,y) } else { h <- hist(x,breaks=breaks) }}psych/R/mat.regress.R0000644000176200001440000000165214526765427014204 0ustar liggesusers"mat.regress" <- function(y,x,data,z=NULL,n.obs=NULL,use="pairwise",square=FALSE) { #a function to extract subsets of variables (a and b) from a correlation matrix m or data set m #and find the multiple correlation beta weights + R2 of the a set predicting the b set #seriously rewritten, March 24, 2009 to make much simpler #minor additons, October, 20, 2009 to allow for print and summary function #major addition in April, 2011 to allow for set correlation message("mat.regress has been replaced by lmCor, please change your call") lmCor(y,x,data,z=NULL,n.obs=NULL,use="pairwise",square=FALSE)} #modified July 12,2007 to allow for NA in the overall matrix #modified July 9, 2008 to give statistical tests #modified yet again August 15 , 2008 to convert covariances to correlations #modified January 3, 2011 to work in the case of a single predictor #modified April 25, 2011 to add the set correlation (from Cohen) psych/R/sim.hierarchical.R0000644000176200001440000001325414473121651015142 0ustar liggesusers# A function to create a correlation matrix with a hierarchical structure # The default values match those of Jensen and Weng # dropped the call to mvrnorm Nov 28, 2014 # and added back the mu parameter in Sept,2016 (thanks to Alan Robinson for noticing this) # "sim.hierarchical" <- # function (gload=NULL,fload=NULL,n=0,raw=FALSE,mu = NULL) { # cl <- match.call() # if(is.null(gload)) gload=matrix(c(.9,.8,.7),nrow=3) # if(is.null(fload)) {fload <-matrix(c(.8,.7,.6,rep(0,9),.7,.6,.5,rep(0,9),.6,.5,.4), ncol=3)} # # fcor <- gload %*% t(gload) #the factor correlation matrix # diag(fcor) <-1 #put ones on the diagonal # model <- fload %*% fcor %*% t(fload) #the model correlation matrix for oblique factors # diag(model)<- 1 # put ones along the diagonal # nvar <- dim(fload)[1] # colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="") # if(n>0) { # if(is.null(mu)) mu <- rep(0,nvar) # observed <- mvrnorm(n = n, mu, Sigma=model, tol = 1e-6, empirical = FALSE) # the next 3 lines replaces mvrnorm (adapted from mvrnorm, but without the checks) # eX <- eigen(model) # observed <-matrix(rnorm(nvar * n),n) # observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(observed)) # observed <- t(t(observed) + mu) # colnames(observed) <- paste("V",1:nvar,sep="") # r <- cor(observed) # if(!raw) { # result <- list(model=model, r= r, N=n,Call=cl) } else { # result <- list(model=model, r= r,observed=observed, N=n,Call=cl)} # class(result) <- c("psych","sim") # return(result) # } else {return( model) } # # } #An alternative model is to simulate independent factors with cross loadings #simulate the Thomson Bond'd model to produce a positive manifold without a g factor #Developed Sept 14, 2019 "sim.bonds" <- function(nvar=9,nf=NULL,loads=c(0,0,.5,.6),validity=.8) { if(is.null(nf)) nf <- length(loads) f <- matrix(0,nrow=(nvar+ nf),ncol=nf) for (i in 1:nvar) { f[i,] <- sample(loads,nf) } k <- 1 for(i in (nvar+1):NROW(f)) { f[i,k] <- validity k <- k+1 } colnames(f) <- paste0("F",1:nf) rownames(f) <- paste0("V",1:NROW(f)) #fill them all in first rownames(f)[(nvar+1) : (nvar+nf)] <- paste0("F",1:nf) R <- f %*% t(f) diag(R) <- 1 return(list(R=R,model=f) ) } # A function to create a correlation matrix with a hierarchical structure # The default values match those of Jensen and Weng #completely rewritten, March 15, 2021 to add in the ability to report factor sccores #modified May 16 to be able to return "items" rather than continuous scales "sim.hierarchical" <- function (gload=NULL,fload=NULL,n=0, raw=TRUE, mu = NULL,categorical=FALSE, low=-3,high=3,threshold=NULL) { cl <- match.call() #first, prepare the jensen defaults if(is.null(gload)) gload=matrix(c(.9,.8,.7),nrow=3) if(is.null(fload)) fload <-matrix(c(.8,.7,.6,rep(0,9),.7,.6,.5,rep(0,9),.6,.5,.4), ncol=3) if(is.null(colnames(fload))) cn <- paste0("F",1:NCOL(fload)) if(is.null(rownames(fload))) rn <- paste0("V",1:NROW(fload)) nfact <- NCOL(fload) fcor <- gload %*% t(gload) #the factor correlation matrix diag(fcor) <- 1 #put ones on the diagonal of the factor correlations model <- fload %*% fcor %*% t(fload) #the model correlation matrix for oblique factors U <- diag(sqrt(1-(diag(model)))) diag(model)<- 1 # put ones along the diagonal Ig <- diag(drop(gload)) primeload <- fload %*% Ig #the general factor loadings on each group factor fstar <- sqrt(fload^2 - primeload^2) #The Schmid Leiman loadings (fload with g removed) gstar <- fload %*% gload #combine them gfstar <- cbind(gstar,fstar) #the factor model with a g factor and orthogonal sl factors #house keeping -- put i some nice labels colnames(gfstar) <- c("g",paste0("F",1:nfact,"*")) #label the lower level as F* to match omega nvar <- nrow(fload) colnames(model) <- rownames(model)<- paste0("V",1:nvar) n.groups <- NCOL(gfstar ) if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)} if(n>0) { #make up the data if(is.null(mu)) mu <- rep(0,n.groups) #don't forget to add this in later g <- t(t(matrix(rnorm(n *(n.groups ) ),ncol=n.groups))+mu) #the factor scores+ factor means colnames(g)<- c("g",paste0("F",1:nfact,"*")) #now generate the lower order variables theta <- t(gfstar %*% t(g)) #g are the orthogonal factor scores, theta are the noise free observed error <- matrix(rnorm(n*nvar),ncol=nvar) observed <- theta + error %*% (U) #weight the error by uniqueness of the variables if(categorical) {#convert from continuous to categorical if(is.null(threshold)) { observed = round(observed) #round all items to nearest integer value observed[(observed <= low)] <- low observed[(observed > high) ] <- high } else { i <- 1:nvar observed <- t(t(observed [,i]) > threshold[i]) + 0 } } colnames(observed) <- paste0("V",1:nvar) r <- cor(observed) colnames(r) <- rownames(model)<- paste0("V",1:nvar) if(!raw) { result <- list(model=model, r= r, N=n,Call=cl) } else { result <- list(model=model, r= r,observed=observed,theta=g, sl=gfstar, N=n,Call=cl)} #include the sl solution class(result) <- c("psych","sim") return(result) } else {return( model) } #just the modeled matrix } psych/R/outlier.R0000644000176200001440000000173112365465461013425 0ustar liggesusers#created 27/7/14 "outlier" <- function(x,plot=TRUE,bad=5,na.rm=TRUE,xlab,ylab,...) { if(missing(xlab)) xlab <- expression("Quantiles of " * ~chi ^2) if(missing(ylab)) ylab <- expression("Mahalanobis " * D^2) rn <- rownames(x) nvar <- ncol(x) n.obs <- nrow(x) if(!is.matrix(x)) x <- as.matrix(x) nvar <- ncol(x) Sx <- cov(x,use="pairwise") Sx.inv <- solve(Sx) # Mx <- colMeans(x,na.rm=na.rm) # x <- sweep(x,2,Mx) #x <- t(scale(t(x),scale=FALSE)) x <- scale(x,scale=FALSE) D2 <- t(apply(x,1,function(xx) colSums(xx * Sx.inv,na.rm=TRUE))) D2 <- rowSums(D2*x,na.rm=TRUE) names(D2) <- rn if(plot) { Chi2 <- qchisq(ppoints(n.obs), df = nvar) qqplot(Chi2, D2, main = expression("Q-Q plot of Mahalanobis" * ~D^2 * " vs. quantiles of" * ~ chi[nvar]^2),xlab=xlab,ylab=ylab,...) abline(0, 1, col = 'gray') worst <- order(D2,decreasing=TRUE) text(Chi2[n.obs:(n.obs-bad+1)],D2[worst[1:bad]],names(D2)[worst[1:bad]],pos=3,...) } return(D2) } psych/R/VSS.R0000644000176200001440000002440114374010170012375 0ustar liggesusers#vss is just an alias to VSS to be consistent with naming conventions #added the RMSEA, BIC, SABIC and complexity criteria 1/27/14 "VSS" <- function (x,n=8,rotate="varimax",diagonal=FALSE,fm="minres",n.obs=NULL,plot=TRUE,title="Very Simple Structure",use="pairwise",cor="cor",...) #apply the Very Simple Structure Criterion for up to n factors on data set x {vss(x=x,n=n,rotate=rotate,diagonal=diagonal,fm=fm,n.obs=n.obs,plot=plot,title=title,use=use,cor=cor,...) } "vss" <- function (x,n=8,rotate="varimax",diagonal=FALSE,fm="minres",n.obs=NULL,plot=TRUE,title="Very Simple Structure",use="pairwise",cor="cor",...) #apply the Very Simple Structure Criterion for up to n factors on data set x #x is a data matrix #n is the maximum number of factors to extract (default is 8) #rotate is a string "none" or "varimax" for type of rotation (default is "varimax" #diagonal is a boolean value for whether or not we should count the diagonal (default=FALSE) # ... other parameters for factanal may be passed as well #e.g., to do VSS on a covariance/correlation matrix with up to 8 factors and 3000 cases: #VSS(covmat=msqcovar,n=8,rotate="none",n.obs=3000) { cl <- match.call() if (rotate=="oblimin") {if(!requireNamespace('GPArotation')) {stop("You must have GPArotation installed to use oblimin rotation")}} old_rotate=rotate #used to remember which rotation to use #start Function definition #first some preliminary functions #complexrow sweeps out all except the c largest loadings #complexmat applies complexrow to the loading matrix complexrow <- function(x,c) #sweep out all except c loadings { n=length(x) #how many columns in this row? temp <- x #make a temporary copy of the row x <- rep(0,n) #zero out x for (j in 1:c) { locmax <- which.max(abs(temp)) #where is the maximum (absolute) value x[locmax] <- sign(temp[locmax])*max(abs(temp)) #store it in x temp[locmax] <- 0 #remove this value from the temp copy } return(x) #return the simplified (of complexity c) row } complexmat <- function(x,c) #do it for every row (could tapply somehow?) { nrows <- dim(x)[1] ncols <- dim(x)[2] for (i in 1:nrows) {x[i,] <- complexrow(x[i,],c)} #simplify each row of the loading matrix return(x) } map <- function(x,n) { nvar <- dim(x)[2] min.partial <- rep(NA,n) e <- eigen(x) evect <- e$vectors comp <- evect %*% diag(sqrt(e$values)) if( n >= nvar) {n1 <- nvar -1} else {n1 <- n} for (i in 1:n1) { c11.star <- x - comp[,1:i] %*% t(comp[,1:i]) d <- diag(1/sqrt(diag(c11.star))) rstar <- d %*% c11.star %*% d diag(rstar) <- 0 min.partial[i] <- sum(rstar * rstar) /(nvar*(nvar-1)) } return(min.partial) } if(dim(x)[2] < n) n <- dim(x)[2] #now do the main Very Simple Structure routine complexfit <- array(0,dim=c(n,n)) #store these separately for complex fits complexresid <- array(0,dim=c(n,n)) vss.df <- data.frame(dof=rep(0,n),chisq=NA,prob=NA,sqresid=NA,fit=NA,RMSEA=NA,BIC=NA,SABIC=NA,complex=NA,eChisq=NA,SRMR=NA,eCRMS=NA,eBIC=NA) #keep the basic results here if (dim(x)[1]!=dim(x)[2]) { n.obs <- dim(x)[1] switch(cor, cor = {x <- cor(x,use=use)}, cov = {x <- cov(x,use=use) covar <- TRUE}, tet = {x <- tetrachoric(x)$rho}, tetrachoric = {x <- tetrachoric(x)$rho}, poly = {x <- polychoric(x)$rho}, polychoric = {x <- polychoric(x)$rho}, mixed = {x <- mixedCor(x,use=use)$rho}, Yuleb = {x <- YuleCor(x,,bonett=TRUE)$rho}, YuleQ = {x <- YuleCor(x,1)$rho}, YuleY = {x <- YuleCor(x,.5)$rho } ) # x <- cor(x,use="pairwise") The case statement allows many types of correlations } else {if(!is.matrix(x)) x <- as.matrix(x)} # if given a rectangular if(is.null(n.obs)) {message("n.obs was not specified and was arbitrarily set to 1000. This only affects the chi square values.") n.obs <- 1000} map.values <- map(x,n) if (n > dim(x)[2]) {n <- dim(x)[2]} #in cases where there are very few variables for (i in 1:n) #loop through 1 to the number of factors requested { PHI <- diag(i) if(i<2) {(rotate="none")} else {rotate=old_rotate} if(!(fm=="pc")) { f <- fa(x,i,rotate=rotate,n.obs=n.obs,warnings=FALSE,fm=fm,scores="none",cor=cor,...) #do a factor analysis with i factors and the rotations specified in the VSS call if (i==1) {original <- x #just find this stuff once sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal }} else {f <- principal(x,i) if (i==1) {original <- x #the input to pc is a correlation matrix, so we don't need to find it again sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal } if((rotate=="varimax") & (i>1)) {f <- varimax(f$loadings) PHI <- diag(i)} else { if(((rotate=="promax")| (rotate=="Promax") )& (i>1)) {f <- Promax(f$loadings) PHI <- f$Phi} else { if((rotate=="oblimin")& (i>1)) {f <- GPArotation::oblimin(f$loadings) U <- f$Th phi <- t(U) %*% U PHI <- cov2cor(phi) }} }} load <- as.matrix(f$loadings ) #the loading matrix model <- load %*% PHI %*% t(load) #reproduce the correlation matrix by the factor law R= FF' residual <- original-model #find the residual R* = R - FF' sqresid <- residual*residual #square the residuals totalresid <- sum(sqresid)- diagonal * sum(diag(sqresid) ) #sum squared residuals - the main diagonal fit <- 1-totalresid/totaloriginal #fit is 1-sumsquared residuals/sumsquared original (of off diagonal elements if ((fm !="pc")) { vss.df[i,"dof"] <- f$dof #degrees of freedom from the factor analysis vss.df[i,"chisq"] <- f$STATISTIC #chi square from the factor analysis vss.df[i,"prob"] <- f$PVAL #probability value of this complete solution\ vss.df[i,"eChisq"] <- f$chi vss.df[i,"SRMR"] <- f$rms vss.df[i,"eRMS"] <- f$rms vss.df[i,"eCRMS"] <- f$crms vss.df[i,"eBIC"] <- f$EBIC if(!is.null(f$RMSEA)) {vss.df[i,"RMSEA"] <- f$RMSEA[1]} else {vss.df[i,"RMSEA"] <- NA} if(!is.null(f$BIC)) {vss.df[i,"BIC"] <- f$BIC} else {vss.df[i,"BIC"] <- NA} if(!is.null(f$SABIC)) {vss.df[i,"SABIC"] <- f$SABIC} else {vss.df[i,"SABIC"] <- NA} if(!is.null(f$complexity)) {vss.df[i,"complex"] <- mean(f$complexity)} else {vss.df[i,"complex"] <- NA} } vss.df[i,"sqresid"] <- totalresid #residual given complete model vss.df[i,"fit"] <- fit #fit of complete model #now do complexities -- how many factors account for each item for (c in 1:i) { simpleload <- complexmat(load,c) #find the simple structure version of the loadings for complexity c model <- simpleload %*% PHI %*% t(simpleload) #the model is now a simple structure version R ? SS' residual <- original- model #R* = R - SS' sqresid <- residual*residual totalsimple <- sum(sqresid) -diagonal * sum(diag(sqresid)) #default is to not count the diagonal simplefit <- 1-totalsimple/totaloriginal complexresid[i,c] <-totalsimple complexfit[i,c] <- simplefit } } #end of i loop for number of factors vss.stats <- data.frame(vss.df,cfit=complexfit,cresidual=complexresid) if (plot) VSS.plot(vss.stats,title=title) vss.results <- list(title=title,map=map.values,cfit.1=complexfit[,1],cfit.2= complexfit[,2],vss.stats=vss.stats,call=cl) class(vss.results) <- c("psych" ,"vss") return(vss.results) } #end of VSS function "nfactors" <- function(x,n=20,rotate="varimax",diagonal=FALSE,fm="minres",n.obs=NULL,title="Number of Factors",pch=16,use="pairwise",cor="cor",...) { vs <- vss(x=x,n=n,rotate=rotate,diagonal=diagonal,fm=fm,n.obs=n.obs,plot=FALSE,title=title,use=use,cor=cor,...) old.par <- par(no.readonly = TRUE) # save default, for resetting... on.exit(par(old.par)) #and when we quit the function, restore to original values op <- par(mfrow=c(2,2)) x <- vs$vss.stats n = dim(x) plot(x$cfit.1, ylim = c(0, 1), typ = "b", ylab = "Very Simple Structure Fit", xlab = "Number of Factors",main="Very Simple Structure",pch=49) lines(x$cfit.1) x$cfit.2[1] <- NA x$cfit.3[1] <- NA x$cfit.3[2] <- NA lines(x$cfit.2) points(x$cfit.2,pch=50) lines(x$cfit.3) points(x$cfit.3,pch=51) plot(vs$vss.stats[,"complex"],xlab="Number of factors",ylab="Complexity",typ="b",main="Complexity",pch=pch,...) plot(vs$vss.stats[,"eBIC"],xlab="Number of factors",ylab="Empirical BIC",typ="b",main="Empirical BIC",pch=pch,...) plot(vs$vss.stats[,"SRMR"],xlab="Number of factors",ylab="SRMR",typ="b",main="Root Mean Residual",pch=pch,...) results <- list(title=title,map=vs$map,vss.stats=vs$vss.stats[,1:16],call=vs$call) class(results) <- c("psych","vss") return(results) } #added 2/17/23 #implementing an idea from an article by Larsen and Warne 2010 "eigenCi" <- function(x,n.iter=1000, use="pairwise", alpha=.05,plot=FALSE,root=FALSE) { evo <- eigen(cor(x,use=use))$values res <- list() n.items <- NROW(x) nvar <- NCOL(x) for(i in 1:n.iter) { ss <- sample(n.items,n.items,replace=TRUE) R <- cor(x[ss,], use=use) ev <- eigen(R)$values res[[i]] <- c(ev) } res.mat <- matrix(unlist(res),ncol=nvar,byrow=TRUE) #sd <- qnorm(alpha/2) * apply(res.mat,2, sd) boot <- apply(res.mat,2,function(x) quantile(x,c(alpha/2,(1-alpha/2)))) norm <- qnorm(alpha/2)* sqrt((2 * evo^2)/n.items) ci <- data.frame(ev=evo,nlow = evo+norm,nhigh=evo-norm, blow=boot[1,],bhigh =boot[2,]) if(plot) { if(root) {ci <- sqrt(ci) matplot(ci,type="l",ylab="sqrt eigen values",main="Confidence intervals of eigen values")} else { matplot(ci,type="l",ylab="eigen values",main="Confidence intervals of eigen values")}} return(ci) } psych/R/matrix.na.R0000644000176200001440000000355014206176745013644 0ustar liggesusersmatrix.na <- function (x, y) { nvar <- ncol(x) n.obs = nrow(x) ny <- ncol(y) if(nvar!= NROW(y)) stop result <- matrix(NA,ncol=ny,nrow=n.obs) for (k in 1:n.obs) { for (i in 1:ny ) { result[k,i] <- mean(x[k,] * y[,i],na.rm=TRUE)} } result } #an attempt to do fast matrixlike operation with missing data\ matrix.na.z <- function(x,y,scale=TRUE) { #first get tranpose x to make multiplication work nvar <- ncol(x) if(nvar != nrow(y) ) stop("matrices are not compatible")#matrices are not compatible if(scale) x <- scale(x) #zero center and standaridize tx <- t(x) #we want to do it on the transposed matrix ny <- ncol(y) result <- matrix(NA,nrow = nrow(x),ncol= ncol(y)) result <- apply(y,2,function(x ) colSums(x * tx,na.rm=TRUE)) return((result)) } "impute.na" <- function(x,impute="mean") { miss <- which(is.na(x),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x,na.rm=TRUE) #replace missing values with means x[miss]<- item.means[miss[,2]]} else { item.med <- apply(items,2,median,na.rm=TRUE) #replace missing with medians x[miss]<- item.med[miss[,2]]} return(x) } score.na <- function ( keys,r,cor=TRUE,smooth=FALSE) {#score a matrix with missing correlations covar <- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))*x,na.rm=TRUE)) count <- t(keys) %*% keys #counts the number of items/key bad <- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(is.na(r)*abs(x),na.rm=TRUE))*abs(x),na.rm=TRUE)) #the number missin good <- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(!is.na(r)*abs(x),na.rm=TRUE))*abs(x),na.rm=TRUE)) #the number not missing result <- (covar - count)/(good-count) *(good-count + bad)+ count if(cor) result <- cov2cor(result) if(smooth) result<- cor.smooth(result) return(result) #the covariance matrix use cov2cor } psych/R/describe.by.R0000644000176200001440000000755614551333355014141 0ustar liggesusers#modified March 4, 2009 for matrix output #and yet again August 1 to make it actually work! #modified May 26, 2014 to add the ability to specify group by name or location "describe.by" <- function (x,group=NULL,mat=FALSE,type=3,...) { #data are x, grouping variable is group .Deprecated("describeBy", msg = "describe.by is deprecated. Please use the describeBy function") if(inherits(x,"formula")) {ps <- fparse(x) x <- ps$y group <- ps$x} answer <- describeBy(x=x,group=group,mat=mat,type=type,...) return(answer)} #July 9, 2020 added the ability to do formula input "describeBy" <- function (x,group=NULL,mat=FALSE,type=3,digits=15,data=NULL,...) { #data are x, grouping variable is group cl <- match.call() if(inherits(x,"formula")) {ps <- fparse(x) if(missing(data)) {x <- get(ps$y) group <- x[ps$x]} else {x <- data[ps$y] group <- data[ps$x]} } if((inherits(x[1], "data.frame", which=TRUE ) >1 )) x <- fix.dplyr(x) #to get around a problem created by dplyr if(is.null(group)) {answer <- describe(x,type=type) warning("no grouping variable requested")} else { x <- char2numeric(x,flag=FALSE) #do this before doing by groups 4/16/23 if(!is.data.frame(group) && !is.list(group) && (length(group) < NROW(x))) group <- x[,group ,drop=FALSE] answer <- by(x,group,describe,type=type,...) answer <- fix.is.null(answer) class(answer) <- c("psych","describeBy") #probably better not to make of class psych (at least not yet) } if (mat) { ncol <- length(answer[[1]]) #the more complicated case. How to reorder a list of data.frames #the interesting problem is treating the case of multiple grouping variables. n.var <- NROW(answer[[1]]) n.col <- NCOL(answer[[1]]) n.grouping <- length(dim(answer)) #this is the case of multiple grouping variables n.groups <- prod(dim(answer)) names <- names(answer[[1]]) row.names <-attr(answer[[1]],"row.names") dim.names <- attr(answer,"dimnames") mat.ans <- matrix(NA,ncol=ncol,nrow=n.var*n.groups) #NA or NaN labels.ans <- matrix(NA,ncol = n.grouping+1,nrow= n.var*n.groups) #CHANGE TO NA colnames(labels.ans) <- c("item",paste("group",1:n.grouping,sep="")) colnames(mat.ans) <- colnames(answer[[1]]) rn <- 1:(n.var*n.groups) k <- 1 labels.ans[,1] <- seq(1,(n.var*n.groups)) # for (grouping in 1:n.grouping) { labels.ans[,grouping+1] <- attr(answer,"dimnames")[[grouping]] }#no group.scale <- cumprod(c(1,dim(answer))) for (var in 1:(n.var*n.groups)) { for (group in 1:n.grouping) { groupi <- ((trunc((var-1)/group.scale[group]) ) %% dim(answer)[group] ) +1 labels.ans[var,group+1] <- dim.names[[group]][[groupi]]} } k <- 1 for (var in 1:n.var) { for (group in 1:n.groups) { rn[k] <- paste(row.names[var],group,sep="") #mat.ans[k,1] <- group for (stat in 1:n.col) {if(!is.null(answer[[group]][[stat]][var])) { mat.ans[k,stat] <- round(answer[[group]][[stat]][var],digits)} else { mat.ans[k,stat] <- NA } } k <- k + 1} } answer <- data.frame( labels.ans,mat.ans) rownames(answer) <- rn } #class(answer) <- c("psych","describe","list") #answer$Call <- cl return(answer)} #added 11/05/20 to fix the problem of NULL values for certain combinations #doesn't work for 3 way or more classifications 1/5/20 (reported by Nicholas Stefaniak) #fixed 1/5/20 fix.is.null <- function(x ) { fix <- data.frame(vars=NA,n=NA,mean=NA,sd=NA,median=NA,trimmed=NA,mad=NA,min=NA,max=NA,range=NA,skew=NA,kurtosis=NA,se=NA) fix <- char2numeric(fix) n.obs <- NROW(x) n.var <- NCOL(x) n.cells <- length(x) if(n.cells > n.obs * n.var) { for (cells in (1:n.cells)) if(is.null(x[cells])) {x[cells] <- fix } } else { if(n.var > 1) { for (i in 1:n.obs) { for (j in 1 :n.var) { if(is.null(x[[i,j]])) x[[i,j]] <- fix} } }} return(x) }psych/R/polychoric.R0000644000176200001440000004645414376520416014124 0ustar liggesusers#Faster Polychoric uses tableF (a function to speed up 2 way tables of integers #first, we introduce a new function to find integer tables without error checking #if all data are integer then #tableF is the fast version of table #it does no error checking and works only for two dimensional integer data tableF <- function(x,y) { minx <- min(x,na.rm=TRUE) #these probably could be found just once maxx <- max(x,na.rm=TRUE) miny <- min(y,na.rm=TRUE) maxy <- max(y,na.rm=TRUE) maxxy <- (maxx+(minx==0))*(maxy+(miny==0)) dims=c(maxx + 1 - min(1,minx),maxy+1 - min(1,minx)) bin <- x - minx+ (y-miny)*(dims[1])+ max(1,minx) ans <- matrix(tabulate(bin,maxxy),dims) ans } #perhaps even faster, but more importantly does not drop categories - probably needs to be passed both x and y min and max tableFast <- #revised and preferred, but requires specifying the min and max function(x,y,minx,maxx,miny,maxy) { #y and x can have separate min and max in the case of polydi,normally they are the same maxxy <- (maxx+(minx==0))*(maxy+(minx==0)) bin <- x-minx + (y-minx) *maxx+ 1 dims=c(maxx + 1 - min(1,minx),maxy+1 - min(1,miny)) ans <- matrix(tabulate(bin,maxxy),dims) ans } #adapted from John Fox's Polychor #polyc does all the work but does not work in cases of incomplete tables #thus, the polychor function is used #moved these first two function out of the polyc function in the hope that they will be compiled just once and perhaps get a speed increase #doesn't seem to make a difference although it does make the code a bit easier to read #polychoric.mc is added while we test it versus polychoric # polyBinBvn.old <- function (rho,rc,cc) #adapted from John Fox's polychor # { if (min(rc) < -9999) rc <- rc[-1] # if (min(cc) < - 9999) cc <- cc[-1] # if (max(rc) > 9999) rc <- rc[-length(rc)] # if (max(cc) > 99999) cc <- cc[-length(cc)] # row.cuts <- c(-Inf,rc,Inf) # col.cuts <- c(-Inf,cc,Inf) # nr <- length(rc) + 1 # nc <- length(cc) + 1 # # # P <- matrix(0, nr,nc) # R <- matrix(c(1,rho,rho,1),2,2) # # diag(R) <- 1 # for (i in 1:nr) { # for (j in 1:nc) { # P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]), # upper = c(row.cuts[i + 1], col.cuts[j + 1]), # corr = R) #should we specify the algorithm to TVPACK or Miwa # }} # P #the estimated n x n predicted by rho, rc, cc # } polyBinBvn<- function (rho,rc,cc) { #adapted from John Fox's polychor #but recognizes that we don't need to calculate all cells because of degrees of freedom # if ( min(rc,na.rm=TRUE) < -9999) rc <- rc[-1] # if ( min(cc,na.rm=TRUE) < - 9999) cc <- cc[-1] # if (max(rc,na.rm=TRUE) > 9999) rc <- rc[-length(rc)] # if (max(cc,na.rm=TRUE) > 9999) cc <- cc[-length(cc)] row.cuts <- c(-Inf,rc,Inf) col.cuts <- c(-Inf,cc,Inf) # nr <- length(rc) + 1 # nc <- length(cc) + 1 #replaced with next two lines 9/8/14 nr <- length(row.cuts) -1 nc <- length(col.cuts) -1 P <- matrix(0, nr,nc) R <- matrix(c(1,rho,rho,1),2,2) # diag(R) <- 1 for (i in 1:(nr-1)) { for (j in 1:(nc-1)) { P[i, j] <- mnormt::sadmvn(lower = c(row.cuts[i], col.cuts[j]), upper = c(row.cuts[i + 1], col.cuts[j + 1]), mean=rep(0,2), varcov = R) #should we specify the algorithm to TVPACK or Miwa }} P[1,nc] <- pnorm(rc[1]) - sum(P[1,1:(nc-1)] ) P[nr,1] <- pnorm(cc[1]) - sum(P[1:(nr-1),1] ) if(nr >2) {for (i in (2:(nr-1))) {P[i,nc] <- pnorm(rc[i]) -pnorm(rc[i-1])- sum(P[i,1:(nc-1)] ) }} if(nc >2) {for (j in (2:(nc-1))) {P[nr,j] <- pnorm(cc[j]) - pnorm(cc[j-1])-sum(P[1:(nr-1),j] ) }} if(nc > 1) P[nr,nc] <- 1- pnorm(rc[nr-1]) - sum(P[nr,1:(nc-1)]) P #the estimated n x n predicted by rho, rc, cc } # polyF <- function(rho,rc,cc,tab) { # P <- polyBinBvn(rho, rc, cc) # -sum(tab * log(P)) } #the criterion to be minimized # #revised 16/6/18 to cover the problem of 0 values in cells polyF <- function(rho,rc,cc,tab) { #doesn't blow up in the case of 0 cell entries added 16/6/18 P <- polyBinBvn(rho, rc, cc) P[P <=0] <- NA #added 18/2/9 lP <- log(P) lP[lP == -Inf] <- NA lP[lP == Inf] <- NA -sum(tab * lP,na.rm=TRUE) } #the criterion to be minimized "wtd.table" <- function(x,y,weight) { tab <- tapply(weight,list(x,y),sum,na.rm=TRUE,simplify=TRUE) #taken from questionr:wtd.table tab[is.na(tab)] <- 0 return(tab) } #modified 10/8/14 to create missing values when there are no cell entries #modified 3/6/14 to create missing values when the data are hopeless #modified 06/2/18 for the case of empty cells "polyc" <- #uses the tableFast function instead of tableF function(x,y=NULL,taux,tauy,global=TRUE,weight=NULL,correct=correct,gminx,gmaxx,gminy,gmaxy) { if(is.null(weight )) {tab <- tableFast(x,y,gminx,gmaxx,gminy,gmaxy) } else {tab <- wtd.table(x,y,weight)} #need to specify minx and maxx somehow fixed <- 0 tot <- sum(tab) if(tot ==0) {result <- list(rho=NA,objective=NA,fixed=1) return(result)} #we have no data for this cell 05/02/18 tab <- tab/tot if(global) { rho <- optimize(polyF,interval=c(-1,1),rc=taux, cc=tauy,tab)#this uses the global taux and tauy } else { #use item row and column information for this pair, rather than global values #this seems to match the polycor function #the next five lines are adapted directly from John Fox's polycor function if(!is.na(sum(tab)) ) { #this checks for completely missing data zerorows <- apply(tab, 1, function(x) all(x == 0)) zerocols <- apply(tab, 2, function(x) all(x == 0)) zr <- sum(zerorows) zc <- sum(zerocols) tab <- tab[!zerorows, ,drop=FALSE] tab <- tab[, !zerocols, drop=FALSE] if(correct > 0) {if(any(tab[]==0)) {fixed <- 1 tab[tab==0] <- correct/tot }} #moved from below 16.6.22 which introduced a bug, moved to here 7/24/20 csum <- colSums(tab) rsum <- rowSums(tab) #if(correct > 0) tab[tab==0] <- correct/tot if(min(dim(tab)) < 2) {rho <- list(objective = NA) } else { cc <- qnorm(cumsum(csum)[-length(csum)]) rc <- qnorm(cumsum(rsum)[-length(rsum)]) rho <- optimize(polyF,interval=c(-1,1),rc=rc, cc=cc,tab) } } else { rho <- list(objective = NA, rho= NA)}} if(is.na(rho$objective)) {result <- list(rho=NA,objective=NA,fixed=fixed) } else { result <- list(rho=rho$minimum,objective=rho$objective,fixed=fixed)} return(result) } ########## #We have dropped option to use John Fox's polycor package, so we don't need the options #function(x,smooth=TRUE,global=TRUE,polycor=FALSE,ML = FALSE, std.err = FALSE,weight=NULL,correct=.5,progress=TRUE,na.rm=TRUE,delete=TRUE) { #12/25/19 added the ability to have a y set of variables as well "polychoric" <- function(x,y=NULL,smooth=TRUE,global=TRUE,polycor=FALSE,ML = FALSE, std.err = FALSE,weight=NULL,correct=.5,progress=TRUE,na.rm=TRUE,delete=TRUE,max.cat=8) { #function(x,smooth=TRUE,global=TRUE,polycor=FALSE,weight=NULL,correct=.5,progress=TRUE,na.rm=TRUE,delete=TRUE) { #if(!require(parallel)) {message("polychoric requires the parallel package.")} #declare these next two functions to be local inside of polychoric #The polycor paramater was dropped because it was not being used. But, several programs are using it. if(polycor) message("The polycor option has been removed from the polychoric function in the psych package. Please fix the call.") if(ML) message("The ML option has been removed from the polychoric function in the psych package. Please fix the call.") if(std.err) message("The std.error option has been removed from the polychoric function in the psych package. Please fix the call.") myfun <- function(x,y,i,j,gminx,gmaxx,gminy,gmaxy) {polyc(x[,i],x[,j],tau[,i],tau[,j],global=global,weight=weight,correct=correct,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) } myfuny <- function(x,y,i,j,gminx,gmaxx,gminy,gmaxy,tauy) {polyc(x[,i],y[,j],tau[,i],tauy[,j],global=global,weight=weight,correct=correct,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) } matpLower <- function(x,nvar,gminx,gmaxx,gminy,gmaxy) { k <- 1 il <- vector() jl <- vector() for(i in 2:nvar) {for (j in 1:(i-1)) { il[k] <- i jl [k] <- j k<- k+1} } poly <- mcmapply(function(i,j) myfun(x,y=NULL,i,j,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) , il,jl) #poly <- mapply(function(i,j) myfun(x,y=NULL,i,j,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) , il,jl) #debugging, we turn off the mcmapply function and do it by hand # browser() # ppl <- list() # for (i in 2:nvar) {for (j in 1:(i-1)) {ppl[[i+j]] <- myfun(x,i,j,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) } } #now make it a matrix mat <- diag(nvar) if(length(dim(poly)) == 2) { mat[upper.tri(mat)] <- as.numeric(poly[1,]) #first row of poly is correlation, 2nd the fit mat <- t(mat) + mat fixed <- as.numeric(poly[3,]) diag(mat) <- 1 fixed <- sum(fixed) if((fixed > 0) && ( correct > 0)) { warning(fixed ," cells were adjusted for 0 values using the correction for continuity. Examine your data carefully.")} return(mat)} else { warning("Something is wrong in polycor ") return(poly) #never actually gets here stop("we need to quit because something was seriously wrong. Please look at the results")} } matpxy <- function(x,y,nvar,nvar.y,gminx,gmaxx,gminy,gmaxy,tauy) { if(!is.matrix(tauy)) tauy <- matrix(tauy,ncol=nvar.y) #make the lists that are passed to myfuny k <- 1 il <- vector() jl <- vector() for(i in 1:nvar) {for (j in 1:(nvar.y)) { il[k] <- i jl [k] <- j k<- k+1} } #Now, we apply the function myfuny and return the polychoric of the x by y #poly <- mapply(function(i,j) myfuny(x,y,i,j,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy,tauy) , il,jl) poly <- mcmapply(function(i,j) myfuny(x,y,i,j,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy,tauy) , il,jl) #now make it a matrix mat <- matrix(NA,ncol=nvar,nrow=nvar.y) if(length(dim(poly)) == 2) { mat<- as.numeric(poly[1,]) #first row of poly is correlation, 2nd the fit fixed <- as.numeric(poly[3,]) fixed <- sum(fixed) if((fixed > 0) && ( correct > 0)) { warning(fixed ," cells were adjusted for 0 values using the correction for continuity. Examine your data carefully.")} return(mat)} else { warning("Something is wrong in polycor ") return(poly) #never actually gets here stop("we need to quit because something was seriously wrong. Please look at the results")} } #the main function starts here if(!is.null(weight)) {if(length(weight) !=nrow(x)) {stop("length of the weight vector must match the number of cases")}} cl <- match.call() nvar <- dim(x)[2] nsub <- dim(x)[1] if((prod(dim(x)) == 4) | is.table(x)) {result <- polytab(x,correct=correct) print("You seem to have a table, I will return just one correlation.") } else { #the main function x <- as.matrix(x) if(!is.numeric(x)) {x <- matrix(as.numeric(x),ncol=nvar) message("Converted non-numeric input to numeric")} nvalues <- max(x,na.rm=TRUE) - min(x,na.rm=TRUE) + 1 if(nvalues > max.cat) stop("You have more than", max.cat," categories for your items, polychoric is probably not needed") #first delete any bad cases item.var <- apply(x,2,sd,na.rm=na.rm) bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) & delete) { for (baddy in 1:length(bad)) {message( "Item = ",colnames(x)[bad][baddy], " had no variance and was deleted")} x <- x[,-bad] nvar <- nvar - length(bad) } xmin <- apply(x,2,function(x) min(x,na.rm=TRUE)) #if(global) { xmin <- min(xmin)} xmin <- min(xmin) x <- t(t(x) - xmin +1) #all numbers now go from 1 to nvalues gminx <- gminy <- 1 #allow for different minima if minmax is null xmax <- apply(x,2,function(x) max(x,na.rm=TRUE)) #if(global) xmax <- max(xmax) if (min(xmax) != max(xmax)) {global <- FALSE warning("The items do not have an equal number of response alternatives, global set to FALSE.")} xmax <- max(xmax) #don't test for globality xmax gmaxx <- gmaxy <- xmax #check for different maxima xfreq <- apply(x,2,tabulate,nbins=nvalues) n.obs <- colSums(xfreq) xfreq <- t(t(xfreq)/n.obs) tau <- qnorm(apply(xfreq,2,cumsum)[1:(nvalues-1),]) #these are the normal values of the cuts #moved ) to after ] if(!is.matrix(tau)) tau <- matrix(tau,ncol=nvar) #rownames(tau) <- levels(as.factor(x))[1:(nvalues-1)] #doesn't work if one response is missing rownames(tau) <- 1:(nvalues -1) colnames(tau) <- colnames(x) mat <- matrix(0,nvar,nvar) colnames(mat) <- rownames(mat) <- colnames(x) #x <- x - min(x,na.rm=TRUE) +1 #this is essential to get the table function to order the data correctly -- but we have already done it if(is.null(y)) { mat <- matpLower(x,nvar,gminx,gmaxx,gminy,gmaxy) #the local copy has the extra paremeters #do the multicore version if(any(is.na(mat))) {message("some correlations are missing, smoothing turned off") smooth <- FALSE} if(smooth) {mat <- cor.smooth(mat) } colnames(mat) <- rownames(mat) <- colnames(x) tau <- t(tau) tauy<- NULL } else { #process the x * y data ymin <- apply(y,2,function(x) min(x,na.rm=TRUE)) ymin <- min(ymin,na.rm=TRUE) nvar.y <- NCOL(y) y <- t((t(y) - ymin +1)) #all numbers go from 1 to ymax +1 ymax <- apply(y,2,function(x) max(x,na.rm=TRUE)) ymax <- max(ymax,na.rm=TRUE) gminy <- 1 gmaxy <- ymax nvaluesy <- ymax - ymin +1 yfreq <- apply(y,2,tabulate,nbins=nvaluesy) n.obs.y <- colSums(yfreq) yfreq <- t(t(yfreq)/n.obs.y) tauy <- qnorm(apply(yfreq,2,cumsum))[1:(nvalues-1),] if(!is.matrix(tauy)) tauy <- matrix(tauy,ncol=nvar.y) rownames(tauy) <- 1:(nvalues-1) colnames(tauy) <- colnames(y) mat <- matpxy(x,y,nvar,nvar.y,gminx,gmaxx,gminy,gmaxy,tauy) mat <- matrix(mat,ncol=nvar,nrow=nvar.y) colnames(mat )<- colnames(x) rownames(mat) <- colnames(y) tauy <- t(tauy) tau <- t(tau) } result <- list(rho = mat,tau = tau,tauy = tauy,n.obs=nsub,Call=cl) class(result) <- c("psych","poly") } return(result) } ##### #use polychor from John Fox to do the same #matches polychor output perfectly if correct=FALSE #polytab is for just one pair of variables "polytab" <- function(tab,correct=TRUE) { tot <- sum(tab) tab <- tab/tot if(correct > 0) tab[tab==0] <- correct/tot #use item row and column information for this pair, rather than global values csum <- colSums(tab) rsum <- rowSums(tab) cc <- qnorm(cumsum(csum[-length(csum)])) rc <- qnorm(cumsum(rsum[-length(rsum)])) rho <- optimize(polyF,interval=c(-1,1),rc=rc, cc=cc,tab) result <- list(rho=rho$minimum,objective=rho$objective,tau.row=rc,tau.col =cc) return(result) } ########################################################################################################## #9/6/14 to facilitate mixed cor we find polytomous by dichotomous correlations #4/08/17 fixed to not do table(p) or table(d) #has a problem if we are correcting 0 values "polydi" <- function(p,d,taup,taud,global=TRUE,ML = FALSE, std.err = FALSE,weight=NULL,progress=TRUE,na.rm=TRUE,delete=TRUE,correct=.5) { #if(!require(parallel)) {message("polychoric requires the parallel package.")} #declare these next two functions to be local inside of polychoric myfun <- function(x,i,j,correct,taup,taud,gminx,gmaxx,gminy,gmaxy,np) {polyc(x[,i],x[,j],taup[,i],taud[1,(j-np)],global=global,weight=weight,correct=correct,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy) } #global changed to true 16/6/19 and set back again to global=global on 09/07/17 matpLower <- function(x,np,nd,taup,taud,gminx,gmaxx,gminy,gmaxy) { k <- 1 il <- vector() jl <- vector() for(i in 1:np) {for (j in 1:nd) { il[k] <- i jl [k] <- j k <- k+1} } poly <- mcmapply(function(i,j) myfun(x,i,j,correct=correct,taup=taup,taud=taud,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy,np=np) , il,jl+np) #the multicore version #poly <- mapply(function(i,j) myfun(x,i,j,correct=correct,taup=taup,taud=taud,gminx=gminx,gmaxx=gmaxx,gminy=gminy,gmaxy=gmaxy,np=np) , il,jl +np) #the normal version for debugging #now make it a matrix mat <- matrix(np,nd) mat <- as.numeric(poly[1,]) #first row of poly is correlation, 2nd the fit return(mat) } #if(!require(mnormt) ) {stop("I am sorry, you must have mnormt installed to use polychoric")} if(!is.null(weight)) {if(length(weight) !=nrow(x)) {stop("length of the weight vector must match the number of cases")}} cl <- match.call() np <- dim(p)[2] nd <- dim(d)[2] if(is.null(np)) np <- 1 if(is.null(nd)) nd <- 1 nsub <- dim(p)[1] p <- as.matrix(p) d <- as.matrix(d) #pt <- table(p) #why do we do this? #nvalues <- length(xt) #find the number of response alternatives nvalues <- max(p,na.rm=TRUE) - min(p,na.rm=TRUE) + 1 #dt <- table(d) dmin <- apply(d,2,function(x) min(x,na.rm=TRUE)) dmax <- apply(d,2,function(x) max(x,na.rm=TRUE)) dvalues <- max(dmax-dmin) if(dvalues !=1) stop("You did not supply a dichotomous variable") if(nvalues > 8) stop("You have more than 8 categories for your items, polychoric is probably not needed") #first delete any bad cases item.var <- apply(p,2,sd,na.rm=na.rm) bad <- which((item.var <= 0)|is.na(item.var)) if((length(bad) > 0) & delete) { for (baddy in 1:length(bad)) {message( "Item = ",colnames(p)[bad][baddy], " had no variance and was deleted")} p <- p[,-bad] np <- np - length(bad) } pmin <- apply(p,2,function(x) min(x,na.rm=TRUE)) #allow for different minima #gminx <- min(pmin) minx <- min(pmin) p <- t(t(p) - pmin +1) #all numbers now go from 1 to nvalues #p <- t(t(p) - gminx +1) #all numbers now go from 1 to nvalues but we should use global minimima #gminy <- min(dmin) miny <- min(dmin) #d <- t(t(d) - gminy +1) d <- t(t(d) - dmin +1) #this allows a separate minimum for each d variable gminx <- gminy <- 1 #set the global minima to 1 pmax <- apply(p,2,function(x) max(x,na.rm=TRUE)) #check for different maxima gmaxx <- max(pmax) if (min(pmax) != max(pmax)) {global <- FALSE warning("The items do not have an equal number of response alternatives, I am setting global to FALSE")} gmaxy <- max(apply(d,2,function(x) max(x,na.rm=TRUE))) #xfreq <- apply(x- xmin + 1,2,tabulate,nbins=nvalues) pfreq <- apply(p,2,tabulate,nbins=nvalues) n.obs <- colSums(pfreq) pfreq <- t(t(pfreq)/n.obs) taup <- as.matrix(qnorm(apply(pfreq,2,cumsum))[1:(nvalues-1),],ncol=ncol(pfreq)) #these are the normal values of the cuts #if(!is.matrix(tau)) tau <- matrix(tau,ncol=nvar) #rownames(taup) <- names(pt)[1:(nvalues-1)] rownames(taup) <- paste(1:(nvalues-1)) colnames(taup) <- colnames(p) dfreq <- apply(d,2,tabulate,nbins=2) if(nd < 2) {n.obsd <- sum(dfreq) } else {n.obsd <- colSums(dfreq) } dfreq <- t(t(dfreq)/n.obsd) taud <- qnorm(apply(dfreq,2,cumsum)) mat <- matrix(0,np,nd) rownames(mat) <- colnames(p) colnames(mat) <- colnames(d) #x <- x - min(x,na.rm=TRUE) +1 #this is essential to get the table function to order the data correctly x <- cbind(p,d) mat <- matpLower(x,np,nd,taup,taud,gminx,gmaxx,gminy,gmaxy) #the local copy has the extra parameters #do the multicore version mat <- matrix(mat,np,nd,byrow=TRUE) rownames(mat) <- colnames(p) colnames(mat) <- colnames(d) taud <- t(taud) result <- list(rho = mat,tau = taud,n.obs=nsub,Call=cl) class(result) <- c("psych","polydi") return(result) } psych/R/missing.cor.R0000644000176200001440000000054012240203033014143 0ustar liggesusers"missing.cor" <- function(x) { nvar <- ncol(x) Mux <- apply(x,2,mean,na.rm=TRUE) Varx <- apply(x,2,var,na.rm=TRUE) X <- scale(x,scale=FALSE) Covx <- diag(Varx,ncol=nvar) N <- t(!is.na(x)) %*% (!is.na(x)) for(i in 2:nvar) { for (j in 1:(i-1)) { Covx[i,j] <- sum(X[i]*X[j],na.rm=TRUE) } } Covx <- Covx/(N-1) }psych/R/VSSem.R0000644000176200001440000001511712262100643012722 0ustar liggesusers"VSSem" <- function (x,n=8,rotate="varimax",diagonal=FALSE,pc="pa",n.obs=NULL,...) #apply the Very Simple Structure Criterion for up to n factors on data set x #find the maximum likelihood goodness of fit criterion #x is a data matrix #n is the maximum number of factors to extract (default is 8) #rotate is a string "none" or "varimax" for type of rotation (default is "none" #diagonal is a boolean value for whether or not we should count the diagonal (default=FALSE) # ... other parameters for factanal may be passed as well #e.g., to do VSS on a covariance/correlation matrix with up to 8 factors and 3000 cases: #VSS(covmat=msqcovar,n=8,rotate="none",n.obs=3000) { #start Function definition #first some preliminary functions #complexrow sweeps out all except the c largest loadings #complexmat applies complexrow to the loading matrix complexrow <- function(x,c) #sweep out all except c loadings { n=length(x) #how many columns in this row? temp <- x #make a temporary copy of the row x <- rep(0,n) #zero out x for (j in 1:c) { locmax <- which.max(abs(temp)) #where is the maximum (absolute) value x[locmax] <- sign(temp[locmax])*max(abs(temp)) #store it in x temp[locmax] <- 0 #remove this value from the temp copy } return(x) #return the simplified (of complexity c) row } complexmat <- function(x,c) #do it for every row (could tapply somehow?) { nrows <- dim(x)[1] ncols <- dim(x)[2] for (i in 1:nrows) {x[i,] <- complexrow(x[i,],c)} #simplify each row of the loading matrix return(x) } #now do the main Very Simple Structure routine complexfit <- array(0,dim=c(n,n)) #store these separately for complex fits complexchi <- array(0,dim=c(n,n)) complexchi2 <- array(0,dim=c(n,n)) complexdof <- array(0,dim=c(n,n)) complexresid <- array(0,dim=c(n,n)) vss.df <- data.frame(dof=rep(0,n),chisq=0,prob=0,sqresid=0,fit=0) #keep the basic results here if (dim(x)[1]!=dim(x)[2]) { n.obs <- dim(x)[1] x <- cor(x,use="pairwise") } else {if(!is.matrix(x)) x <- as.matrix(x)} # if given a rectangular if(is.null(n.obs)) {message("n.obs was not specified and was arbitrarily set to 1000. This only affects the chi square values.") n.obs <- 1000} if (n > dim(x)[2]) {n <- dim(x)[2]} #in cases where there are very few variables n.variables <- dim(x)[2] for (i in 1:n) #loop through 1 to the number of factors requested { if(!(pc=="pc")) { if ( pc=="pa") { f <- fa(x,i,fm="pa",rotate=rotate,n.obs=n.obs,...) #do a factor analysis with i factors and the rotations specified in the VSS call if (i==1) {original <- x #just find this stuff once sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal }} else { f <- fa(x,i,fm=pc,rotate=rotate,covmat=x,n.obs=n.obs,...) #do a factor analysis with i factors and the rotations specified in the VSS call if (i==1) {original <- x #just find this stuff once sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal }} } else {f <- principal(x,i) if (i==1) {original <- x #the input to pc is a correlation matrix, so we don't need to find it again sqoriginal <- original*original #squared correlations totaloriginal <- sum(sqoriginal) - diagonal*sum(diag(sqoriginal) ) #sum of squared correlations - the diagonal } if((rotate=="varimax") & (i>1)) {f <- varimax(f$loadings)} else { if((rotate=="promax") & (i>1)) {f <- promax(f$loadings)} }} load <- as.matrix(f$loadings ) #the loading matrix model <- load %*% t(load) #reproduce the correlation matrix by the factor law R= FF' residual <- original-model #find the residual R* = R - FF' sqresid <- residual*residual #square the residuals totalresid <- sum(sqresid)- diagonal * sum(diag(sqresid) ) #sum squared residuals - the main diagonal fit <- 1-totalresid/totaloriginal #fit is 1-sumsquared residuals/sumsquared original (of off diagonal elements if ((pc!="pc")) { #factor.pa reports the same statistics as mle, although the fits are not as good vss.df[i,1] <- f$dof #degrees of freedom from the factor analysis vss.df[i,2] <- f$STATISTIC #chi square from the factor analysis vss.df[i,3] <- f$PVAL #probability value of this complete solution } vss.df[i,4] <- totalresid #residual given complete model vss.df[i,5] <- fit #fit of complete model #now do complexities -- how many factors account for each item for (c in 1:i) { simpleload <- complexmat(load,c) #find the simple structure version of the loadings for complexity c model <- simpleload%*%t(simpleload) #the model is now a simple structure version R ? SS' residual <- original- model #R* = R - SS' sqresid <- residual*residual totalsimple <- sum(sqresid) -diagonal * sum(diag(sqresid)) #default is to not count the diagonal simplefit <- 1-totalsimple/totaloriginal complexresid[i,c] <-totalsimple complexfit[i,c] <- simplefit #find the chi square value for this level of complexity (see factor.pa for more details on code) diag(model) <- 1 model.inv <- solve(model) nfactors <- i m.inv.r <- model.inv %*% original dof <- n.variables * (n.variables-1)/2 - n.variables * c + (nfactors *(nfactors-1)/2) objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) -n.variables if (!is.null(n.obs)) {STATISTIC <- objective * (n.obs-1) -(2 * n.variables + 5)/6 -(2*nfactors)/3 if (dof > 0) {PVAL <- pchisq(STATISTIC, dof, lower.tail = FALSE)} else PVAL <- NA} complexchi[i,c] <- STATISTIC complexdof[i,c] <- dof res1 <- residual diag(res1) <- 1 complexchi2[i,c] <- -(n.obs - n.variables/3 -1.8) *log(det(res1)) } } #end of i loop for number of factors vss.stats <- data.frame(vss.df,cfit=complexfit,chisq=complexchi,complexchi2,complexdof,cresidual=complexresid) return(vss.stats) } #end of VSS function psych/R/tenberge.R0000644000176200001440000000074411335353520013524 0ustar liggesusers"tenberge" <- function(r) {n <- dim(r)[2] if(dim(r)[1] > n) {r <- cor(r,use="pairwise")} vt <- sum(r) off <- r diag(off) <- 0 sum.off <- sum(off) sumsq.off <- sum(off^2) lambda1 <- n * sum(off)/((n-1)* vt) lambda2 <- (sum.off+ sqrt(sumsq.off*n/(n-1)))/vt lambda3 <- (sum.off +sqrt(sumsq.off+ sqrt((n * sum(off^4)/(n-1)))))/vt lambda4 <- (sum.off +sqrt(sumsq.off+ sqrt(sum(off^4)+ sqrt((n * sum(off^8)/(n-1))))))/vt return(list(mu0 = lambda1,mu1=lambda2,mu2 = lambda3,mu3=lambda4)) }psych/R/count.pairwise.R0000644000176200001440000002337414474371433014721 0ustar liggesusers#drastically simplified, March 14, 2009 from two loops to 1 matrix operation #modified July 2, 2013 to allow not counting the diagonal #modified 08/31/23 to not use the lower.tri but rather the complete matrix "count.pairwise" <- function (x, y=NULL,diagonal=TRUE) { .Deprecated("pairwiseCount",msg="count.pairwise is deprecated. Please use the pairwiseCount function.") if(is.null(y)) {n <- t(!is.na(x)) %*% (!is.na(x)) } else { n <- t(!is.na(x)) %*% (!is.na(y)) } if(!diagonal) diag(n) <- NA return(n) } pairwiseDescribe <- function(x,y=NULL,diagonal=FALSE,...) { cp <- pairwiseCount(x,y=y,diagonal=diagonal) #cp <- as.vector(cp[lower.tri(cp,diag=diagonal)]) #this overestimates the and also gives the wrong answer if diagonal=TRUE cp <- as.vector(cp) describe(cp,...) } # replaces count.pairwise # slightly improved # "pairwiseCount" <- # function (x, y=NULL,diagonal=TRUE) # { x <- !is.na(x) # if(is.null(y)) {n <- t(x) %*% (x) } else { n <- t(x) %*% (!is.na(y)) } # if(!diagonal & is.null(y)) diag(n) <- NA # return(n) } #improvement using crossprod reduces memory load and speeds it up noticably. #For 255K cases and 953 variables, the timings are 114 seconds versus 193 before #timings seem to vary as square of variables (from 100 to 800 at least) "pairwiseCount" <- function (x, y=NULL,diagonal=TRUE) { x <- !is.na(x) if(is.null(y)) {n <- crossprod(x)} else { n <- crossprod(x,!is.na(y)) } if(!diagonal & is.null(y)) diag(n) <- NA return(n) } #doesn't work yet "pairwiseDelete" <- function(x,cut=0) { if(isCorrelation(x)) {#we have already found correlations, get rid of the least number of NAs possible nvar <- ncol(x) nmissing <-apply(x,2, function(xx) sum(is.na(xx))) max.missing <- max(nmissing,na.rm=TRUE) select <- (nmissing == max.missing) newx <- x[!select,!select] } else { #do a count pairwise and then get rid of the least number <= cut if(ncol(x)!= nrow(x) ) {pc <- pairwiseCount(x)} #OK, it is a matrix of counts nbad <- apply(pc,2, function(xx) sum(xx < cut)) max.bad <- max(nbad) select <- (nbad == max.bad) newx <- pc[!select,!select] } return(newx) } # created July 30, 2018 to impute correlations "pairwiseImpute" <- function(keys,R,fix=FALSE) { cl <- match.call() if(!isCorrelation(R)) {message("Correlations found from raw data") cp <- pairwiseCount(R) R <- cor(R,use="pairwise") } else {cp <- NULL cpij <- NA} n.var <- ncol(R) diag(R) <- NA if(is.list(keys)) {select <- sub("-","",unlist(keys)) select <- select[!duplicated(select)] } else { keys <- keys2list(keys) select <- selectFromKeyslist(colnames(R),keys) select <- select[!duplicated(select)]} R <- R[select,select,drop=FALSE] if(!is.null(cp)) cp <- cp[select,select,drop=FALSE] keynames <- colnames(keys) keys <- make.keys(R,keys) n.keys <- dim(keys)[2] n.items <- dim(keys)[1] abskeys <- abs(keys) keynames <- colnames(keys) num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale n.keys <- ncol(keys) #brute force av.r <- matrix(NA,n.keys,n.keys) count.r <- matrix(NA,n.keys,n.keys) percent <- matrix(NA,n.keys,n.keys) size <- matrix(NA,n.keys,n.keys) for(i in 1:n.keys) { for( j in 1:n.keys) { selectij <- R[keys[,i] >0,keys[,j] > 0] if(!is.null(cp)) {cpij <- cp[keys[,i] >0,keys[,j] > 0]} av.r[i,j] <- mean(selectij,na.rm=TRUE) size[i,j] <- mean(cpij,na.rm=TRUE) count.r[i,j] <- sum(!is.na(selectij)) percent[i,j] <- count.r[i,j]/(count.r[i,j] + sum(is.na(selectij))) } } colnames(av.r) <- rownames(av.r) <- colnames(keys) colnames(count.r) <- rownames(count.r) <- colnames(keys) colnames(percent) <- rownames(percent) <- colnames(keys) colnames(size) <- rownames(size) <- colnames(keys) if(is.null(cp)) size <- NULL if(fix) { for(i in 1:n.keys) { for( j in 1:n.keys) { temp <- which(is.na( R[keys[,i] > 0,keys[,j]>0])) R[keys[,i] > 0,keys[,j]>0][temp] <- av.r[i,j] } } diag(R) <- 1 result <- list(av.r =av.r,count=count.r,percent=percent,imputed=R,Call=cl) } else { result <- list(av.r =av.r,count=count.r,percent=percent,size=size,Call=cl) } class(result) <- c("psych","pairwise") return(result) } #modified July 20th to give more useful summaries "pairwiseReport" <- function(x,y=NULL,cut=0,diagonal=FALSE,...) { cl <- match.call() if(isCorrelation(x)) {#we have already found correlations, flag those that are missing report <- which(is.na(x),arr.ind=TRUE) } else { #do a count pairwise and then get report those below <= cut if(NCOL(x)!=NROW(x)) {pc <- pairwiseCount(x,y)} else {pc <- x} if(is.null(y)) {cp <- as.vector(pc[lower.tri(pc,diag=diagonal)]) des <- describe(cp,...)} else {des <- NULL} report <- which((pc <= cut),arr.ind=TRUE) } if(length(report >0)) {if(is.null(y)) report <- report[report[,1] > report[,2],,drop=FALSE] N <- rep(0,dim(report)[1]) for (i in 1:dim(report)[1] ){N[i] <- pc[report[i,1],report[i,2]]} report <- cbind(report,N) if(is.null(y) ) { df <- data.frame(rows=colnames(x)[report[,1]],cols=colnames(x)[report[,2]],N=N) } else { #although this can be pretty big df <- data.frame(rows =colnames(x)[report[,1]],cols =colnames(y)[report[,2]], N=N) } result <- list(description=des, rows=table(df$rows),cols=table(df$cols),cut = cut, df=df, Call=cl)} else { result <- list(description=des,rows=NULL,cols=NULL,cut=cut,df=NULL,Call=cl)} class(result) <- c("psych","pairwiseCounts") return(result) } #Added Feb 1, 2021 to allow smaller samples for faster processing of enormous data sets (e.g. sapa) "pairwiseSample" <- function(x,y=NULL,diagonal=FALSE,size=100,...) { cl <- match.call() n.items <- NCOL(x) size <- min(size,n.items) ss <- sample(n.items,size) #do a count pairwise and then get report those below <= cut if(NCOL(x)!=NROW(x)) {pc <- pairwiseCount(x[ss])} cp <- as.vector(pc[lower.tri(pc,diag=diagonal)]) des <- describe(cp,quant=c(.05,.25,.5,.75,.95)) result <- des # class(result) <- c("psych","pairwiseCounts") return(result) } "pairwisePlot" <- function(x,y=NULL,upper=TRUE,diagonal=TRUE,labels=TRUE,show.legend=TRUE,n.legend=10,colors=FALSE,gr=NULL,minlength=6,xlas=1,ylas=2,main="Relative Frequencies",count=TRUE,...) { if(count){ r <- pairwiseCount(x=x,y=y,diagonal=diagonal)} else {r <- x} if(!upper) r[col (r) < row(r) ] <- NA #blank out the upper diagonal if(!diagonal) r[col(r) == row(r)] <- NA nvar <- NROW(r) nf <- NCOL(r) MAR <- 5 if(is.null(colnames(x))) colnames(r) <- paste0("V",1:nf) if(is.null(rownames(x))) rownames(r) <- paste0("V",1:nvar) if(!labels) {minlength <- NULL max.len <- 1} else { max.len <- min(max(nchar(rownames(r)))/6,minlength)} r <- r/max(r,na.rm=TRUE) zlim <- c(0,1) if(colors) { if(missing(gr)) {gr <- colorRampPalette(c("red","white","blue"))} colramp <- gr(n.legend) } else { colramp <- grey((n.legend:0)/n.legend)} #colramp <- adjustcolor(colramp,alpha.f =alpha) if(nvar != nf) { r <- t(r) } #if(!is.null(select)) {r <- r[select,select] # pval <- pval[select,select] # nvar <- length(select) # } #reverse the order of the columns (if square) ord1 <- seq(nvar,1,-1) #if(nf == nvar) { r <- r[,ord1] #} #reorder the columns to allow image to work #MAR <- 5 par(mar = c(MAR +max.len,MAR+max.len, 4, .5)) line <- NA tick <- TRUE if(show.legend) { #set it up to do two plots layout(matrix(c(1,2),nrow=1),widths=c(.9,.1),heights=c(1,1)) } image(r,col=colramp,axes=FALSE,main=main,zlim=zlim) box() if(labels) { if(!is.null(minlength)) { rownames(r) <- abbreviate(rownames(r),minlength = minlength) colnames(r) <- abbreviate(colnames(r),minlength = minlength) max.len <- max(nchar(rownames(r)))/6} at1 <- (0:(nf-1))/(nf-1) at2 <- (0:(nvar-1)) /(nvar-1) lab1 <- rownames(r) lab2 <- colnames(r) axis(2,at=at2,labels=lab2,las=ylas,...) tick <- FALSE axis(1,at=at1,labels=lab1,las=xlas,line=line,tick=tick,...) } #screen 2 leg <- matrix(seq(from=zlim[1],to=zlim[2],by =(zlim[2] - zlim[1])/n.legend),nrow=1) par(mar=c(MAR,0, 4,3)) image(leg,col=colramp,axes=FALSE,zlim=zlim) at2 <- seq(0,1,1/n.legend) labels =seq(zlim[1],zlim[2],(zlim[2]-zlim[1])/(length(at2)-1)) axis(4,at=at2,labels =labels,las=2,...) #put them back in the logical order ord1 <- seq(nvar,1,-1) r <- r[,ord1 ] invisible(r) } "pairwiseZero" <- function(x,y=NULL, min=0, short=TRUE) { nvar <- NCOL(x) ynull <- is.null(y) if(ynull) y <- x ny <- NCOL(y) count <- 0 loc <- list() rn <- colnames(x) cn <- colnames(y) for(i in 1:(nvar-(ynull))) { for(j in (min(i,ny) + (ynull)):ny) { minxy <- min(tableF(x[,i],y[,j]), na.rm=TRUE) if(minxy <= min) {count <- count + 1 loc[[count]] <- c(rn[i],cn[j])} } } if(count>0) {cat("\n Number of cells with values <= ", min," = ", count,"\n") count.df <- matrix(unlist(loc), ncol=2, byrow=TRUE) colnames(count.df) <- c("row", "column") print(count.df) tables <- list() for (i in 1:count) { cat("\n Item pair ", count.df[i,],"\n") tables[[i]] = tableF(x[,count.df[i,1]],y[,count.df[i,2]] ) if(!short) {# cat("\n" ,count.df[i, ],"\n" ) print(tables[[i]])} } names(tables) <- paste(count.df[,1],count.df[,2]) } else {cat("\n Minimum cell all exceed", min)} result <- list(items=count.df, tables=tables) invisible(result) }psych/R/scatter.hist.R0000644000176200001440000002444514107733514014355 0ustar liggesusers"scatter.hist" <- "scatterHist" <- function(x,y=NULL,smooth=TRUE,ab=FALSE, correl=TRUE,data=NULL, density=TRUE,means=TRUE, ellipse=TRUE,digits=2,method="pearson",cex.cor=1,cex.point=1,title="Scatter plot + density", xlab=NULL,ylab=NULL,smoother=FALSE,nrpoints=0,xlab.hist=NULL,ylab.hist=NULL,grid=FALSE,xlim=NULL,ylim=NULL,x.breaks=11,y.breaks=11, x.space=0,y.space=0,freq=TRUE,x.axes=TRUE,y.axes=TRUE,size=c(1,2),col=c("blue","red","black"),legend=NULL,alpha=.5,pch=21, show.d=TRUE, x.arrow=NULL,y.arrow=NULL,d.arrow=FALSE,cex.arrow=1,...) { old.par <- par(no.readonly = TRUE) # save default sb <- grp <- NULL main <- title if(inherits(x, "formula")) { ps <- fparse(x) formula <- TRUE if(is.null(data)) stop("You must specify the data if you are using formula input") y <- ps$y x <- ps$x if(length(x) > 1) { byGroup <- TRUE freq <- FALSE grp <- x[-1] grp.name <- grp grp <- data[,grp,drop=FALSE] n.grp <- length(table(grp)) x <- x[1] xy <-data[,c(x,y,grp.name),drop=FALSE] sb <- statsBy(xy,group=grp.name,cors=TRUE) } else {grp <- NULL byGroup <- FALSE Md <- NULL } if(is.null(xlab)) xlab <- x if(is.null(ylab)) ylab <- y x <- data[,x,drop=FALSE] y <- data[,y,drop=FALSE] } col <- adjustcolor(col,alpha.f =alpha) n.obs <- sum(!is.na(x)) if(missing(xlab)) { if(!is.null(colnames(x))) {xlab=colnames(x)[1] ylab=colnames(x)[2]} else {xlab="V1" ylab="V2"} } if (is.null(y)) {y <- x[,2] x <- x[,1]} else {if(!is.null(dim(x))) {x <- x[,1,drop=TRUE] # if(!is.null(colnames(y))) ylab <- colnames(y) if(!is.null(dim(y))) {y <- y[,1,drop=TRUE]} } } if(missing(ylab)) { ylab <- colnames(y) } if(is.null(grp)) grp <-1 #if((length(pch) ==1 )&&(pch ==".")) pch <- 45 #this is kludge because we add 1 to pch later if(NROW(grp) > 1) { byGroup <- TRUE dx <- by(x,grp,function(xx) density(xx, adjust=1,na.rm=TRUE)) dy <- by(y,grp,function(xx) density(xx, adjust=1,na.rm=TRUE)) #what does adjust do? I am copying this from my densityBy function x.means <- by(x, grp, function (xx) mean(xx, na.rm=TRUE)) #could clean this up to use statsBy output y.means <- by(y, grp, function(xx) mean(xx, na.rm=TRUE) ) x.sd <- by(x, grp, function (xx) sd(xx, na.rm=TRUE)) y.sd <- by(y, grp, function (xx) sd(xx, na.rm=TRUE)) x.n <-by(x,grp,function(xx) sum(!is.na(xx))) y.n <-by(y,grp,function(xx) sum(!is.na(xx))) x.d <- (x.means[2]-x.means[1])/sqrt(((x.sd[1]^2*(x.n[1]-1))+ x.sd[2]^2* (x.n[2]-1))/(x.n[1] + x.n[2])) #changed to n-1 7/29/21 y.d <- (y.means[2]-y.means[1])/sqrt(((y.sd[1]^2*(y.n[1]-1)+ y.sd[2]^2* (y.n[2]-2)))/(y.n[1] + y.n[2])) R.inv <- solve(sb$rwg) dist <- c(x.d,y.d) Md <- sqrt(t(dist) %*% R.inv %*% dist) if(show.d) {x.arrow=round(x.d,2) y.arrow=round(y.d,2) } grp <- unlist(grp) dx.max <- dy.max <- -9999 for(i in 1:length(dx) ) { dx.max <- max(dx.max, dx[[i]]$y) dy.max <- max(dy.max, dy[[i]]$y)} } else {byGroup <- FALSE x.means <- y.means <-x.d <- y.d<- stats<- NA #give them a value so we have something to report in the results } xrange <- range(x,na.rm=TRUE) yrange <- range(y,na.rm=TRUE) if(missing(xlim)) xlim <- xrange if(missing(ylim)) ylim <- yrange x.breaks <- seq(xlim[1],xlim[2],(xlim[2] - xlim[1])/x.breaks) y.breaks <- seq(ylim[1],ylim[2],(ylim[2] - ylim[1])/y.breaks) xhist <- hist(x,breaks=x.breaks,plot=FALSE) yhist <- hist(y,breaks=y.breaks,plot=FALSE) nf <- layout(matrix(c(2,4,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) #locations to plot the scatter plot #first plot is in location 1 #nf <- layout(matrix(c(3,4,6,1, 2,5),2,3,byrow=TRUE),c(3,3,1),c(1,3)) #two x, 1 y # nf <- layout(matrix(c(5,6,9,3,4,8,1, 2,7),3,3,byrow=TRUE),c(3,3,1),c(1,3,3)) #two x, 2 y #layout.show(nf) #the first figure is the plot par(mar=c(5,4,1,1)) #bottom, left, top, right if(smoother) {smoothScatter(x,y,nrpoints=nrpoints,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,...)} else { if((length(pch) ==1 )&&(pch ==".")){plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,bg=col[grp],pch=pch)} else { plot(x,y,xlim=xlim,ylim=ylim,xlab=xlab,ylab=ylab,col=col[grp],bg=col[grp],pch=pch+grp,cex=cex.point)} } if(grid) grid() if(ab) abline(lm(y~x)) if(smooth) { ok <- is.finite(x) & is.finite(y) if (any(ok)) # lines(stats::lowess(x[ok], y[ok], f = span, iter = iter), col = col.smooth, ...) lines(stats::lowess(x[ok],y[ok]),col="red")} if(ellipse) { if(byGroup) { grp.values <- table(grp) #fix to work with multiple (>20 groups) grp.names <- dimnames(grp.values) ngrp <- length(grp.names$grp) for (i in 1:ngrp) {x.grp <- x[grp==grp.names[[1]][[i]]] y.grp <- y[grp==grp.names[[1]][[i]]] #lowx <- x[grp==grp.names[[1]][[1]]] #lowy <- y[grp==grp.names[[1]][[1]]] #highx <- x[grp==grp.names[[1]][[2]]] # highy <- y[grp==grp.names[[1]][[2]]] ellipses(x.grp,y.grp,smooth=FALSE,add=TRUE,n=1,size=size,col=col[i],...) } if(d.arrow) dia.arrow(c(x.means[1],y.means[1]), c(x.means[2],y.means[2]),labels=round(Md,2),both=TRUE,cex=cex.arrow) } else { #do something (but how?) # xy <- cbind(x,y,grp) #temp <- by(xy,grp,function(xx) colMeans(xx,na.rm=TRUE)) #for (i in length(temp) ){ # points(temp[[i]][1:2],pch=25,cex=4) ellipses(x,y,add=TRUE,size=size )} } if(!missing(legend) & byGroup) { #show the legend location <- c("topleft","topright","top","left","right") grp.names <- paste(grp.name,names(table(grp))) n.grp <- length(grp.names) leg.text <- grp.names legend(location[legend],legend=leg.text,col=col[1:n.grp],fill=col[1:n.grp],pch=(pch+1:n.grp), lty=c(1:n.grp)) } #this next figure is the density of the x axis par(mar=c(.75,4,2,1)) #the left and right here should match the left and right from above (the location for the x density) #now, if we have a grouping variable, then plot a barplot for each grp value if(byGroup) { plot(dx[[1]],main="",xlim=xrange,axes=FALSE,ylim =c(0,dx.max)) title(main,...) for (i in 1:length(dx)) { if(freq) {scal <- dx[[i]]$n dx[[i]]$y <- dx[[i]]$y*scal} polygon(dx[[i]] ,col=col[i]) #x.mean <- mean(dx[[i]]$x) #this is based upon the density, not the data x.mean <- x.means[i] y.mean <- mean(dx[[i]]$y[256]) #the median y.mean <- dx[[i]]$y[which.max(dx[[i]]$x > x.means[i])] segments(x.mean,0,x.mean,y.mean) } if(!is.null(x.arrow)) { dia.arrow(c(x.means[1],.2*max(dx[[1]]$y)), c(x.means[2],.2*max(dx[[1]]$y)),labels=x.arrow,both=TRUE,cex=cex.arrow)} } else { if(freq) { mp <- barplot(xhist$counts, axes=x.axes, space=x.space,xlab=xlab.hist)} else { mp <- barplot(xhist$density, axes=x.axes, space=x.space,xlab=xlab.hist)} #xhist <- hist(x,breaks=11,plot=TRUE,freq=FALSE,axes=FALSE,col="grey",main="",ylab="") tryd <- try( d <- density(x,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE) if(!inherits(tryd ,"try-error")) { d$x <- (mp[length(mp)] - mp[1]+1) * (d$x - min(xhist$breaks))/(max(xhist$breaks)-min(xhist$breaks)) if(freq) d$y <- d$y * max(xhist$counts/xhist$density,na.rm=TRUE) if(density) lines(d)} } # title(title,cex=cex.title) #doesn't actually do anything #now show the y axis densities par(mar=c(5,0.5,1,2)) #the location for the y density if(byGroup) { #if(freq) { mp <- barplot(xhist$counts, axes=x.axes, space=x.space,xlab=xlab.hist,plot=TRUE)} else { mp <- barplot(xhist$density, axes=x.axes, space=x.space,xlab=xlab.hist,plot=TRUE)} plot(dy[[1]],main="",axes=FALSE,ylim=yrange,xlim = c(0,dy.max),xlab="Density") for (i in 1:length(dy)) { temp <- dy[[i]]$y #swap the x and y columns dy[[i]]$y <- dy[[i]]$x dy[[i]]$x <- temp if(freq) {scal <- dy[[i]]$n dy[[i]]$y <- dy[[i]]$y*scal} polygon(dy[[i]] ,col=col[i]) x.mean <- y.means[i] # x.mean <- mean(dy[[i]]$y) y.mean <- dy[[i]]$x[which.max(dy[[i]]$y > y.means[i])] #this adjusts for the problem that means of density do not match real means segments(0,x.mean,y.mean,x.mean) } if(!is.null(y.arrow)) { dia.arrow(c(.2*max(dx[[1]]$y),y.means[1]), c(.2*max(dx[[1]]$y), y.means[2]),labels=y.arrow,both=TRUE,cex=cex.arrow)} } else { if(freq) {mp <- barplot(yhist$counts, axes=y.axes, space=y.space, horiz=TRUE,ylab=ylab.hist) } else {mp <- barplot(yhist$density, axes=y.axes, space=y.space, horiz=TRUE,ylab=ylab.hist)} tryd <- try( d <- density(y,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE) if(!inherits(tryd,"try-error")) { temp <- d$y d$y <- (mp[length(mp)] - mp[1]+1) * (d$x - min(yhist$breaks))/(max(yhist$breaks)-min(yhist$breaks)) d$x <- temp if(freq) d$x <- d$x * max(yhist$counts/yhist$density,na.rm=TRUE) if(density) lines(d) } } #this is the upper right hand square -- show the correlation/Md par(mar=c(1,1,1,1)) #the location for the correlations if we are not doing groups if(correl) { plot(1,1,type="n",axes=FALSE) #plot(x,y) med.x <- median(x,na.rm=TRUE) med.y <- median(y,na.rm=TRUE) if(missing(method)) method <- "pearson" r = (cor(x, y,use="pairwise",method=method)) # txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste0("r = ", round(r,digits),"\n") if(missing(cex.cor)) {cex <- 0.75/strwidth(txt)} else {cex <- cex.cor} text(1,1, txt,cex=cex) if(!is.null(sb))text(1,.8,paste0("r wg =",round(sb$rwg[1,2],2) ),cex=.8*cex)} else { plot(1,1,type="n",axes=FALSE) if(!is.null(Md) ) { txt <- paste0("D = ",sprintf("%.2f",round(Md,2)),"\n") if(missing(cex.cor)) {cex <- 0.75/strwidth(txt)} else {cex <- cex.cor} text(1,1,txt,cex=cex) #text(1,.8,paste0("r wg =",round(sb$rwg[1,2],2) ),cex=cex) } } result <- list(x.means=x.means,y.means=y.means,x.d=x.d, y.d = y.d,stats=sb) par(old.par) invisible(result) } #version of March 7, 2011 #revised Sept 7, 2013 to include method option in cor #revised October 3, 2020 to allow groups and to allow formula input #revised June 6, 2021 to allow ellipses by groups psych/R/ellipses.R0000644000176200001440000000520713373326305013555 0ustar liggesusers"ellipses" <- function(x,y=NULL,add=FALSE,smooth=TRUE, lm=FALSE,data=TRUE,n=2,span=2/3, iter=3,col="red", xlab =NULL,ylab= NULL,size=c(1,2),...) { #based upon John Fox's ellipse functions done=FALSE #this is a kludge! segments=51 if(is.null(size)) size <-c(1,2) if((is.matrix(x)) | (is.data.frame(x))) { if (dim(x)[2] >2 ) { pairs.panels(x) done=TRUE } else { if(is.null(xlab)) xlab=colnames(x)[1] if(is.null(ylab)) ylab=colnames(x)[2] y <- x[,2] x <- x[,1] } #dim ==2 } else { if((!is.vector(x)) | (!is.vector(y))) {stop("x and y must be vectors") } } if(!done){ xm <- mean(x,na.rm=TRUE) ym <- mean(y,na.rm=TRUE) xs <- sd(x,na.rm=TRUE) ys <- sd(y,na.rm=TRUE) r = (cor(x, y,use="pairwise")) if(is.null(xlab)) xlab=colnames(x) if(is.null(ylab)) ylab=colnames(y) angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) if (abs(r)>0 )theta <- sign(r)/sqrt(2) else theta=1/sqrt(2) shape <- diag(c(sqrt(1+r),sqrt(1-r))) %*% matrix(c(theta,theta,-theta,theta),ncol=2,byrow=TRUE) ellipse <- unit.circle %*% shape ellipse[,1] <- ellipse[,1]*xs*size[1] + xm ellipse[,2] <- ellipse[,2]*ys*size[1] + ym if (add) { lines(ellipse,col=col, ...) if(data) { points(xm,ym,pch=20,cex=1.5,col=col)} } else {plot(x,y,xlab=xlab,ylab=ylab,...) points(xm,ym,pch=20,cex=1.5,col=col) lines(ellipse, type = "l",col=col,...)} if(smooth) { ok <- is.finite(x) & is.finite(y) if (any(ok)) lines(stats::lowess(x[ok], y[ok], f = span, iter = iter), col = col, ...)} if(lm) { ok <- is.finite(x) & is.finite(y) if (any(ok)) abline(lm(y[ok]~ x[ok]), col = col,lty="dashed", ...)} if(n>1) { ellipse <- unit.circle %*% shape #draw another one ellipse[,1] <- ellipse[,1]*size[2]*xs + xm ellipse[,2] <- ellipse[,2]*size[2]*ys + ym lines(ellipse,col=col, ...) }} } #then, just a fun function to draw Minkowski "circles" "minkowski"<- function(r=2,add=FALSE,main=NULL,xl=1,yl=1) { segments=51 x <- cos((0:segments) * pi/(2* segments)) #this spaces the points out to make a cleaner drawing min.circle <- cbind(x*xl, yl*((1-x^r)/(x^r+(1-x^r)))^(1/r)) if(add) {points(min.circle,type="l") } else plot(min.circle,ylim=c(-1,1),xlim=c(-1,1),typ="l", xlab="",ylab="",main=main) points(-min.circle,typ="l") points(-min.circle[,1],min.circle[,2],typ="l") points(min.circle[,1],-min.circle[,2],typ="l") } psych/R/print.factor.pa.R0000644000176200001440000000604614444406516014752 0ustar liggesusers"print_factor.pa" <- function(x,digits=2,all=FALSE,cutoff=NULL,sort=FALSE,...) { if(is.null(cutoff)) cutoff <- .3 load <- x$loadings nitems <- dim(load)[1] nfactors <- dim(load)[2] loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems),unclass(load)) if(sort) { #first sort them into clusters #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(load),1,which.max) ord <- sort(loads$cluster,index.return=TRUE) loads[1:nitems,] <- loads[ord$ix,] rownames(loads)[1:nitems] <- rownames(loads)[ord$ix] #now sort column wise items <- c(table(loads$cluster),1) #how many items are in each cluster? if(length(items) < (nfactors+1)) {items <- rep(0,(nfactors+1)) #this is a rare case where some clusters don't have anything in them for (i in 1:nfactors+1) {items[i] <- sum(loads$cluster==i) } } #now sort the loadings that have their highest loading on each cluster first <- 1 for (i in 1:nfactors) { if(items[i]>0 ) { last <- first + items[i]- 1 ord <- sort(abs(loads[first:last,i+2]),decreasing=TRUE,index.return=TRUE) loads[first:last,] <- loads[ord$ix+first-1,] rownames(loads)[first:last] <- rownames(loads)[ord$ix+first-1] first <- first + items[i] } } } #end of sort #they are now sorted, don't print the small loadings ncol <- dim(loads)[2]-2 fx <- format(loads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx.1 <- fx[,1] fx.2 <- fx[,3:(2+ncol)] load.2 <- loads[,3:(ncol+2)] fx.2[abs(load.2)< cutoff] <- paste(rep(" ", nc), collapse = "") fx <- data.frame(V=fx.1,fx.2) print(fx,quote="FALSE") #adapted from print.loadings vx <- colSums(load.2^2) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/nitems) if (nfactors > 1) varex <- rbind(varex, "Cumulative Var"= cumsum(vx/nitems)) cat("\n") print(round(varex, digits)) if(!is.null(x$phi)) { cat ("\n With factor correlations of \n" ) colnames(x$phi) <- rownames(x$phi) <- colnames(x$loadings) print(round(x$phi,digits))} else { if(!is.null(x$rotmat)) { U <- x$rotmat phi <- t(U) %*% U phi <- cov2cor(phi) cat ("\n With factor correlations of \n" ) colnames(phi) <- rownames(phi) <- colnames(x$loadings) print(round(phi,digits)) } } objective <- x$criteria[1] if(!is.null(objective)) { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.\n") cat("\nThe degrees of freedom for the model is",x$dof," and the fit was ",round(objective,digits),"\n") if(!is.na(x$n.obs)) {cat("The number of observations was ",x$n.obs, " with Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits),"\n")} } } psych/R/phi.R0000644000176200001440000000136012723127523012510 0ustar liggesusers# slight changes to combine phi and phi1 from W. Revelle # Leo Gurtler 07-09-06 (umlaut omitted by CRAN check) #modified 05/18/08 to correct bug in output detected by Dylan Arena "phi" <- function(t,digits=2) { # expects: t is a 2 x 2 matrix or a vector of length(4) stopifnot(prod(dim(t)) == 4 || length(t) == 4) if(is.vector(t)) t <- matrix(t, 2) r.sum <- rowSums(t) c.sum <- colSums(t) total <- sum(r.sum) r.sum <- r.sum/total c.sum <- c.sum/total v <- prod(r.sum, c.sum) phi <- (t[1,1]/total - c.sum[1]*r.sum[1]) /sqrt(v) names(phi) <- NULL return(round(phi,digits)) } #does not return chi square values phi2chi <- function(phi,n.obs) { chi <- phi^2 * (n.obs) p <- 1 - pchisq(chi,1) return(list(chi=chi,p=p))} psych/R/partial.r.R0000644000176200001440000000465514533767711013650 0ustar liggesusers"partial.r" <- function(data,x,y,use="pairwise",method="pearson",part=FALSE) { cl <- match.call() #convert formula input into prior format #x are the set from which y is partialled if(!missing(x)) { if(inherits(x,"formula")) { ps <- fparse(x) y <- ps$y x <- ps$x z <- ps$z #do we have any variable to partial out ex <- ps$ex #now put it into old form x <- c(y,x) yy <- y #we need to keep the name(s) of the dependent variables y <- z #we partial these from x } } if(!isCorrelation(data)) {n.obs <- dim(data)[1] if(!missing(x) & !missing(y)) {if(!is.character(x) ) x <- colnames(data)[x] if(!is.character(y) ) y <- colnames(data)[y] data <- cor(data[,c(x,y)],use=use,method=method) } else {if(is.null(dim(data))) stop("Specify the rows for data (use , for all rows)") data <- cor(data,use=use,method=method) }} m <- as.matrix(data) #if(missing(x) & missing(y)) {X.resid <- -(solve(m)) #this is thus the image covariance matrix if(missing(x) & missing(y)) {X.resid <- -(Pinv(m)) #this is thus the image covariance matrix diag(X.resid) <- 1/(1- smc(m)) #adjust the diagonal to be 1/error X.resid <- cov2cor(X.resid) rownames(X.resid) <-colnames(X.resid) <- colnames(m)} else { if(missing(x)){ x <- colnames(data) y <- as.character(y) x <- x[!x %in% y]} xy <- c(x,y) X <- m[x,x] Y <- m[x,y] phi <- m[y,y] # phi.inv <- solve(phi) phi.inv <- Pinv(phi) X.resid <- X - Y %*% phi.inv %*% t(Y) if(part) if(length(yy) >1) {diag(X.resid[yy,yy]) <- 1} else {X.resid[yy,yy] <- 1} X.resid <- cov2cor(X.resid) class(X.resid) <- c("psych","partial.r", "matrix") } return(X.resid) } #modified March 23 to use cov2cor instead of the sd line. This makes the diagonal exactly 1. #05/08/17 Completely rewritten to be easier to use and follow for the case of complete partials #modified 03/19/19 to just choose the items to correlate instead of entire matrix #modified 07/25/20 to use the Pseudo Inverse so that in cases of improper matrices, we still give a partial #modified 06/09/21 to add the matrix class to the object. #modified 12/3/21 to add formula input option #modified 12/5/23 to add the ability to do part correlations (suggested by Rick Zinbarg) psych/R/factor.model.R0000644000176200001440000000045311400623637014305 0ustar liggesusers"factor.model" <- function(f,Phi=NULL,U2=TRUE) { if(!is.matrix(f)) f <- as.matrix(f) if(is.null(Phi)) {Phi <- diag(1,dim(f)[2])} if(!is.matrix(Phi)) {Phi <- as.matrix(Phi)} if (!U2) diag(Phi) <- 1 result<- f %*% Phi %*% t(f) if (!U2) diag(result) <- 1 return (result)} psych/R/print.psych.fa.R0000644000176200001440000002542714444406553014615 0ustar liggesusers"print_psych.fa" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,suppress.warnings=TRUE,...) { if(!is.matrix(x) && !is.null(x$fa) && is.list(x$fa)) x <-x$fa #handles the output from fa.poly if(!is.null(x$fn) ) {if(x$fn == "principal") {cat("Principal Components Analysis") } else { cat("Factor Analysis using method = ",x$fm )}} cat("\nCall: ") print(x$Call) load <- x$loadings if(is.null(cut)) cut <- 0 #caving into recommendations to print all loadings #but, if we are print factors of covariance matrices, they might be very small # cut <- min(cut,max(abs(load))/2) #removed following a request by Reinhold Hatzinger nitems <- dim(load)[1] nfactors <- dim(load)[2] if(sum(x$uniqueness) + sum(x$communality) > nitems) {covar <- TRUE} else {covar <- FALSE} loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems),unclass(load)) u2.order <- 1:nitems #used if items are sorted if(sort) { #first sort them into clusters #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(load),1,which.max) ord <- sort(loads$cluster,index.return=TRUE) loads[1:nitems,] <- loads[ord$ix,] rownames(loads)[1:nitems] <- rownames(loads)[ord$ix] #now sort column wise #now sort the loadings that have their highest loading on each cluster items <- table(loads$cluster) #how many items are in each cluster? first <- 1 item <- loads$item for (i in 1:length(items)) {# i is the factor number if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(loads[first:last,i+2]),decreasing=TRUE,index.return=TRUE) u2.order[first:last] <- item[ord$ix+first-1] loads[first:last,3:(nfactors+2)] <- load[item[ord$ix+first-1],] loads[first:last,1] <- item[ord$ix+first-1] rownames(loads)[first:last] <- rownames(loads)[ord$ix+first-1] first <- first + items[i] } } } #end of sort #they are now sorted, don't print the small loadings if cut > 0 # if(max(abs(load) > 1.0) && !covar) cat('\n Warning: A Heywood case was detected. \n') ncol <- dim(loads)[2]-2 rloads <- round(loads,digits) fx <- format(rloads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx.1 <- fx[,1,drop=FALSE] #drop = FALSE preserves the rownames for single factors fx.2 <- fx[,3:(2+ncol),drop=FALSE] load.2 <- as.matrix(loads[,3:(ncol+2)]) fx.2[abs(load.2) < cut] <- paste(rep(" ", nc), collapse = "") if(sort) { fx <- data.frame(V=fx.1,fx.2) if(dim(fx)[2] <3) colnames(fx) <- c("V",colnames(x$loadings)) #for the case of one factor } else {fx <- data.frame(fx.2) colnames(fx) <- colnames(x$loadings)} if(nfactors > 1) {if(is.null(x$Phi)) {h2 <- rowSums(load.2^2)} else {h2 <- diag(load.2 %*% x$Phi %*% t(load.2)) }} else {h2 <-load.2^2} if(!is.null(x$uniquenesses)) {u2 <- x$uniquenesses[u2.order]} else {u2 <- (1 - h2)} #h2 <- round(h2,digits) vtotal <- sum(h2 + u2) if(isTRUE(all.equal(vtotal,nitems))) { cat("Standardized loadings (pattern matrix) based upon correlation matrix\n") com <- x$complexity[u2.order] # u2.order added 9/4/14 if(!is.null(com)) { print(cbind(fx,h2,u2,com),quote="FALSE",digits=digits)} else { print(cbind(fx,h2,u2),quote="FALSE",digits=digits) } } else { cat("Unstandardized loadings (pattern matrix) based upon covariance matrix\n") print(cbind(fx,h2,u2,H2=h2/(h2+u2),U2=u2/(h2+u2)),quote="FALSE",digits=digits)} #adapted from print.loadings if(is.null(x$Phi)) {if(nfactors > 1) {vx <- colSums(load.2^2) } else {vx <- sum(load.2^2) }} else {vx <- diag(x$Phi %*% t(load) %*% load) } names(vx) <- colnames(x$loadings) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/vtotal) if (nfactors > 1) { varex <- rbind(varex, "Cumulative Var"= cumsum(vx/vtotal)) varex <- rbind(varex, "Proportion Explained"= vx/sum(vx)) varex <- rbind(varex, "Cumulative Proportion"= cumsum(vx/sum(vx))) } cat("\n") print(round(varex, digits)) #now, if we did covariances show the standardized coefficients as well if(!isTRUE(all.equal(vtotal,nitems))) { #total variance accounted for is not just the number of items in the matrix cat('\n Standardized loadings (pattern matrix)\n') fx <- format(loads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx.1 <- fx[,1,drop=FALSE] #drop = FALSE preserves the rownames for single factors fx.2 <- round(loads[,3:(2+ncol)]/sqrt(h2+u2),digits) load.2 <- loads[,3:(ncol+2)]/sqrt(h2+u2) fx.2[abs(load.2) < cut] <- paste(rep(" ", nc), collapse = "") fx <- data.frame(V=fx.1,fx.2) if(dim(fx)[2] <3) colnames(fx) <- c("V",colnames(x$loadings)) #for the case of one factor if(nfactors > 1) { h2 <-h2/(h2+u2)} else {h2 <-h2/(h2+u2)} u2 <- (1 - h2) print(cbind(fx,h2,u2),quote="FALSE",digits=digits) if(is.null(x$Phi)) {if(nfactors > 1) {vx <- colSums(load.2^2) } else {vx <- diag(t(load) %*% load) vx <- vx*nitems/vtotal }} else {vx <- diag(x$Phi %*% t(load) %*% load) vx <- vx*nitems/vtotal } names(vx) <- colnames(x$loadings) varex <- rbind("SS loadings" = vx) varex <- rbind(varex, "Proportion Var" = vx/nitems) if (nfactors > 1) {varex <- rbind(varex, "Cumulative Var"= cumsum(vx/nitems)) varex <- rbind(varex, "Cum. factor Var"= cumsum(vx/sum(vx)))} cat("\n") print(round(varex, digits)) } if(!is.null(x$Phi)) { if(!is.null(x$fn) ) { if(x$fn == "principal") {cat ("\n With component correlations of \n" ) } else {cat ("\n With factor correlations of \n" )}} colnames(x$Phi) <- rownames(x$Phi) <- colnames(x$loadings) print(round(x$Phi,digits))} else { if(!is.null(x$rotmat)) { U <- x$rotmat ui <- solve(U) Phi <- t(ui) %*% ui Phi <- cov2cor(Phi) if(!is.null(x$fn) ) { if(x$fn == "principal") {cat ("\n With component correlations of \n" ) } else {cat ("\n With factor correlations of \n" )}} colnames(Phi) <- rownames(Phi) <- colnames(x$loadings) print(round(Phi,digits)) } } if(!is.null(x$complexity)) cat("\nMean item complexity = ",round(mean(x$complexity),1)) objective <- x$criteria[1] if(!is.null(objective)) { if(!is.null(x$fn) ) { if(x$fn == "principal") { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "component is" else "components are", "sufficient.\n")} else { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.\n")}} if(x$fn != "principal") { if(!is.null(x$null.dof)) {cat("\ndf null model = ",x$null.dof, " with the objective function = ",round(x$null.model,digits),...)} if(!is.null(x$null.chisq)) {cat(" with Chi Square = " ,round(x$null.chisq,digits)) } cat("\ndf of the model are",x$dof," and the objective function was ",round(objective,digits),"\n",...) } if(!is.null(x$rms)) {cat("\nThe root mean square of the residuals (RMSR) is ", round(x$rms,digits),"\n") } if(!is.null(x$crms)) {cat("The df corrected root mean square of the residuals is ", round(x$crms,digits),"\n",...) } if((!is.null(x$nh)) && (!is.na(x$nh))) {cat("\nThe harmonic n.obs is " ,round(x$nh)) } if((!is.null(x$chi)) && (!is.na(x$chi))) {cat(" with the empirical chi square ", round(x$chi,digits), " with prob < ", signif(x$EPVAL,digits),"\n" ,...) } if(x$fn != "principal") { if(!is.na(x$n.obs)) {cat("The total n.obs was ",x$n.obs, " with Likelihood Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits),"\n",...)} if(!is.null(x$TLI)) cat("\nTucker Lewis Index of factoring reliability = ",round(x$TLI,digits+1))} if(!is.null(x$RMSEA)) {cat("\nRMSEA index = ",round(x$RMSEA[1],digits+1), " and the", (x$RMSEA[4])*100,"% confidence intervals are ",round(x$RMSEA[2:3],digits+1),...) } if(!is.null(x$BIC)) {cat("\nBIC = ",round(x$BIC,digits))} } if(!is.null(x$fit)) cat("\nFit based upon off diagonal values =", round(x$fit.off,digits)) if ((!is.null(x$fn)) && (x$fn != "principal")) { if(!is.null(x$R2)) { stats.df <- t(data.frame(sqrt(x$R2),x$R2,2*x$R2 -1)) rownames(stats.df) <- c("Correlation of (regression) scores with factors ","Multiple R square of scores with factors ","Minimum correlation of possible factor scores ") colnames(stats.df) <- colnames(x$loadings) } else {stats.df <- NULL} badFlag <- FALSE #however, if the solution is degenerate, don't print them if( (is.null(x$R2)) || (any(max(x$R2,na.rm=TRUE) > (1 + .Machine$double.eps)) )) {badFlag <- TRUE if (!suppress.warnings) { cat("\n WARNING, the factor score fit indices suggest that the solution is degenerate. Try a different method of factor extraction.\n") warning("the factor score fit indices suggest that the solution is degenerate\n")} } else { if(!is.null(stats.df)) { cat("\nMeasures of factor score adequacy \n") print(round(stats.df,digits))} #why do we have this next part? It seems redundant # if(is.null(x$method)) x$method <- "" # if(is.null(x$R2.scores)) x$R2.scores <- NA # if(any(is.na(x$R2.scores)) | any(x$R2 != x$R2.scores)) {stats.df <- t(data.frame(sqrt(x$R2.scores),x$R2.scores,2* x$R2.scores -1)) # cat("\n Factor scores estimated using the ", x$method, " method have correlations of \n") # rownames(stats.df) <- c("Correlation of scores with factors ","Multiple R square of scores with factors ","Minimum correlation of possible factor scores ") # # colnames(stats.df) <- colnames(x$loadings) # print(round(stats.df,digits)) # } } } result <- list(Vaccounted=varex) invisible(result) } #end of print.psych.fa #modified November 22, 2010 to get the communalities correct for sorted loadings, but does this work for covariances? #modified November 18, 2012 to print the empirical chi squares #modified October 13, 2013 to add the invisibile return of varex. #Modified the print statements to make the output fit one slide widthpsych/R/congeneric.sim.R0000644000176200001440000000134111121635000014613 0ustar liggesusers"sim.congeneric" <- function(N = 1000, loads = c(0.8, 0.7, 0.6, 0.5), err=NULL, short=TRUE) { n <- length(loads) loading <- matrix(loads, nrow = n) error <- diag(1, nrow = n) if (!is.null(err)) {diag(error) <- err} else { diag(error) <- sqrt(1 - loading^2) } pattern <- cbind(loading, error) colnames(pattern) <- c("theta", paste("e", seq(1:n), sep = "")) rownames(pattern) <- c(paste("V", seq(1:n), sep = "")) model <- pattern %*% t(pattern) latent <- matrix(rnorm(N * (n + 1)), ncol = (n + 1)) observed <- latent %*% t(pattern) colnames(latent) <- c("theta", paste("e", seq(1:n), sep = "")) if (short) {return(model)} else {result <- list(model=model,pattern=pattern,observed=observed,latent=latent) return(result)} }psych/R/factor.residuals.R0000644000176200001440000000030413572761517015207 0ustar liggesusers"factor.residuals" <- function(r, f) { if(is.matrix(f)) { rstar <- r - factor.model(f)} else { Phi <- f$Phi f <- f$loadings rstar <- r - factor.model(f,Phi=Phi)} return(rstar)} psych/R/slipHalf.R0000644000176200001440000000427412246476062013506 0ustar liggesusers#November 30, 2013 #parts adapted from combn "splitHalf"<- function(r,n.sample=10000,raw=FALSE,covar=FALSE) { cl <- match.call() split <- function(o,n) { A <- B <- rep(0,n) A [o] <- B[-o] <- 1 A[-o] <- B[o] <- 0 AB <- cbind(A,B) R <- t(AB) %*% r %*% AB Rab <- R[1,2]/sqrt(R[1,1]*R[2,2]) rab <- 2*Rab/(1+Rab) result <- list(rab=rab,AB=AB)} maxrb <- -9999 minrb <- 2 n <- ncol(r) n2 <- trunc(n/2) n.obs <- nrow(r) if(n.obs > n) r <- cov(r,use="pairwise") if(!covar) r <- cov2cor(r) e <- 0 m <- n2 h <- m alpha <- ((sum(r) - tr(r))/sum(r)) * n/(n-1) sumr <- 0 x <- seq_len(n) a <- seq_len(m) if(is.null(n.sample) ) { count <- as.integer(round(choose(n, m)))/2 result <- rep(NA,count) #first do the original order o <- a sp <- split(o,n) result[1] <- sp$rab maxrb <- sp$rab maxAB <- sp$AB minrb <- sp$rab minAB <- sp$AB i <- 2 #now, do the rest while (i < (count+1)) { if (e < n - h) { h <- 1L e <- a[m] j <- 1L } else { e <- a[m - h] h <- h + 1L j <- 1L:h } a[m - h + j] <- e + j o <- x[a] sp <- split(o,n) result[i] <- sp$rab sumr <- sumr+ sp$rab if(sp$rab > maxrb) {maxrb <- sp$rab maxAB <- sp$AB} if(sp$rab < minrb) {minrb <- sp$rab minAB <- sp$AB} i <- i + 1L }} else { result <- rep(NA,n.sample) for (i in 1:n.sample) { o <- sample(n,n2) sp <- split(o,n) result[i] <- sp$rab sumr <- sumr+ sp$rab if(sp$rab > maxrb) {maxrb <- sp$rab maxAB <- sp$AB} if(sp$rab < minrb) { minrb <- sp$rab minAB <- sp$AB} } } if(is.null(n.sample)) {meanr <- sumr/count } else {meanr <- sumr/n.sample } meansp <- 2 * meanr/(1+meanr) if(raw) {results <- list(maxrb=maxrb,minrb=minrb,maxAB=maxAB,minAB=minAB,meanr=meanr,alpha=alpha,raw = result,Call = cl) } else {results <- list(maxrb=maxrb,minrb=minrb,maxAB=maxAB,minAB=minAB,meanr=meanr,alpha=alpha,Call=cl)} class(results) <- c("psych","split") results } psych/R/faCor.R0000644000176200001440000000510313761465401012764 0ustar liggesusers#find the correlation between two sets of factors extracted differently "faCor" <- function(r,nfactors=c(1,1),fm=c("minres","minres"),rotate=c("oblimin","oblimin"),scores=c("tenBerge","tenBerge"), adjust=c(TRUE,TRUE),use="pairwise", cor="cor",weight=NULL,correct=.5,Target=list(NULL,NULL)) { cl <- match.call() #find r if data matrix if (!isCorrelation(r)) { matrix.input <- FALSE #return the correlation matrix in this case n.obs <- dim(r)[1] # if given a rectangular matrix, then find the correlation or covariance #multiple ways of find correlations or covariances #added the weights option to tet, poly, tetrachoric, and polychoric June 27, 2018 switch(cor, cor = {r <- cor(r,use=use)}, cov = {r <- cov(r,use=use) covar <- TRUE}, wtd = { r <- cor.wt(r,w=weight)$r}, tet = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, poly = {r <- polychoric(r,correct=correct,weight=weight)$rho}, tetrachoric = {r <- tetrachoric(r,correct=correct,weight=weight)$rho}, polychoric = {r <- polychoric(r,correct=correct,weight=weight)$rho}, mixed = {r <- mixedCor(r,use=use,correct=correct)$rho}, Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho}, YuleQ = {r <- YuleCor(r,1)$rho}, YuleY = {r <- YuleCor(r,.5)$rho } ) } #do the factor 2 different ways if(fm[1]!="pca") {if(is.null(Target[[1]])) {f1 <- fa(r,nfactors=nfactors[1],fm=fm[1],rotate=rotate[1],scores=scores[1])} else {f1 <- fa(r,nfactors=nfactors[1],fm=fm[1],rotate=rotate[1],scores=scores[1],Target=Target[[1]]) } } else {f1 <- pca(r,nfactors=nfactors[1],rotate=rotate[1])} if(fm[2]!="pca") {if(is.null(Target[[2]])) {f2 <- fa(r,nfactors=nfactors[2],fm=fm[2],rotate=rotate[2],scores=scores[2])} else {f2 <- fa(r,nfactors=nfactors[2],fm=fm[2],rotate=rotate[2],scores=scores[2],Target=Target[[2]]) } } else {f2 <- pca(r,nfactors=nfactors[2],rotate=rotate[2])} #Find the interfactor correlations colnames(f1$weights) <- paste0("F",1:ncol(f1$weights)) colnames(f2$weights) <- paste0("F",1:ncol(f2$weights)) rf <- t(f1$weights) %*% r %*% f2$weights #adjust by factor variances rs1 <- diag(t(f1$weights) %*% r %*% f1$weights ) rs2 <- diag(t(f2$weights) %*% r %*% f2$weights ) if(adjust[1]) rf <- diag(1/sqrt(rs1)) %*% rf if(adjust[2]) rf <- rf %*% diag(1/sqrt(rs2)) rownames(rf) <- colnames(f1$loadings) colnames(rf) <- colnames(f2$loadings) fc <- factor.congruence(f1,f2) result <-list(Call=cl,r=rf,congruence=fc, f1=f1,f2=f2,rs1=rs1,rs2=rs2) class(result) <- c("psych","faCor") return(result) } psych/R/rescale.R0000644000176200001440000000030610775435113013347 0ustar liggesusers"rescale" <- function(x,mean=100,sd=15,df=TRUE) {if(df) {x <- data.frame(t(t(scale(x))*sd+mean)) } else {x <- t( t(scale(x))*sd +mean)} return(x) } #corrected April 3 to properly do matrix additionpsych/R/bifactor.R0000644000176200001440000000435313440041651013520 0ustar liggesusers#added checks for GPArotation even though we are already testing somewhere else #the first function finds the first derivative, the second finds the fit "vgbQ.bimin" <- function(L) { k <- dim(L)[2] L2 <- L^2 N <- matrix(1,k,k) diag(N) <- 0 L2N <- L2 %*% N v <- sum(L2 * L2N) G = 4 * L * L2N return(list(f=v,Gq=G)) } "vgQ.bimin" <- function(L) { L2 <- L[,-1] lvg <- vgbQ.bimin(L2) v <- lvg$f G <- lvg$Gq G <-cbind(G[,1],G) G[,1] <- 0 return(list(f=v,Gq=G)) } #adapted from Jennrich and Bentler 2011 #requires GPArotation "bifactor" <- function(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000){ if(requireNamespace('GPArotation')) {GPArotation::GPForth(L, Tmat=Tmat, normalize=normalize, eps=eps, maxit=maxit, method="bimin")} else {stop("Bifactor requires GPArotation")} } #the oblique case #requires GPArotation "biquartimin" <- function(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000){ if(requireNamespace('GPArotation')) {GPArotation::GPFoblq(L, Tmat=Tmat, normalize=normalize, eps=eps, maxit=maxit, method="bimin") } else {stop("biquartimin requires GPArotation")} } #this is a minor patch to the target function to allow it to have missing elements in the target so it more closely approximates the Michael Brown function "vgQ.targetQ" <- function (L, Target = NULL) { if (is.null(Target)) stop("argument Target must be specified.") Gq <- 2 * (L - Target) Gq[is.na(Gq)] <- 0 list(Gq = Gq, f = sum((L - Target)^2,na.rm=TRUE), Method = "Target rotation") } #these next two take advantage of the revised vgQ.targetQ to do oblique or orthogonal rotations "TargetQ" <- function(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000,Target=NULL) { if(requireNamespace('GPArotation')) {GPArotation::GPFoblq(L, Tmat=Tmat,normalize=normalize, eps=eps, maxit=maxit, method="targetQ",Target)} else {stop("TargetQ requires GPArotation")}} "TargetT" <- function(L, Tmat=diag(ncol(L)), normalize=FALSE, eps=1e-5, maxit=1000,Target=NULL) { if(requireNamespace('GPArotation')) {GPArotation::GPForth(L, Tmat=Tmat,normalize=normalize, eps=eps, maxit=maxit, method="targetQ",Target)} else {stop("TargetT requires GPArotation")}} psych/R/r.con.R0000644000176200001440000000062711446540667012765 0ustar liggesusers"r.con" <- function(rho,n,p=.95,twotailed=TRUE) { z <- fisherz(rho) if(n<4) {stop("number of subjects must be greater than 3")} se <- 1/sqrt(n-3) p <- 1-p if(twotailed) p<- p/2 dif <- qnorm(p) zlow <- z + dif*se zhigh <- z - dif*se ci <- c(zlow,zhigh) ci <- fisherz2r(ci) return(ci) } "r2t" <- function(rho,n) { return( rho*sqrt((n-2)/(1-rho^2))) } psych/R/structure.sem.R0000644000176200001440000002053013573007704014555 0ustar liggesusers#just structure.graph, without the graphics (graphics commented out) "structure.sem" <- function(fx,Phi=NULL,fy=NULL, out.file=NULL,labels=NULL,cut=.3,errors=TRUE,simple=TRUE,regression=FALSE) { xmodel <- fx ymodel <- fy if(!is.null(class(xmodel)) && (length(class(xmodel))>1)) { if((inherits(xmodel, "psych") &&( inherits(xmodel, "omega")))) { Phi <- xmodel$schmid$phi xmodel <- xmodel$schmid$oblique} else { if(inherits(xmodel,"psych") && (inherits(xmodel,"fa")) | (inherits(xmodel,"principal"))) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings)} }} else { if(!is.matrix(xmodel) & !is.data.frame(xmodel) &!is.vector(xmodel)) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings) } else {xmodel <- xmodel} } digits <- 2 if(!is.matrix(xmodel) ) {factors <- as.matrix(xmodel)} else {factors <- xmodel} #first some basic setup parameters num.y <- 0 #we assume there is nothing there num.var <- num.xvar <- dim(factors)[1] #how many x variables? if (is.null(num.var) ){num.var <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} num.xfactors <- num.factors if(is.null(labels)) {vars <- xvars <- rownames(xmodel)} else { xvars <- vars <- labels} if(is.null(vars) ) {vars <-xvars <- paste("x",1:num.var,sep="") } fact <- colnames(xmodel) if (is.null(fact)) { fact <- paste("X",1:num.factors,sep="") } num.yfactors <- 0 if (!is.null(ymodel)) { if(is.list(ymodel) & !is.data.frame(ymodel) ) {ymodel <- as.matrix(ymodel$loadings)} else {ymodel <- ymodel} if(!is.matrix(ymodel) ) {y.factors <- as.matrix(ymodel)} else {y.factors <- ymodel} num.y <- dim(y.factors)[1] if (is.null(num.y)) { num.y <- length(ymodel) num.yfactors <- 1} else { num.yfactors <- dim(y.factors)[2] } yvars <- rownames(ymodel) if(is.null(yvars)) {yvars <- paste("y",1:num.y,sep="") } if(is.null(labels)) {vars <- c(xvars,yvars)} else {yvars <- labels[(num.xvar+1):(num.xvar+num.y)]} vars <- c(vars,yvars) yfact <- colnames(ymodel) if(is.null(yfact)) {yfact <- paste("Y",1:num.yfactors,sep="") } fact <- c(fact,yfact) num.var <- num.xvar + num.y num.factors <- num.xfactors + num.yfactors } sem <- matrix(rep(NA),6*(num.var*num.factors + num.factors),ncol=3) colnames(sem) <- c("Path","Parameter","Value") if(!regression) { #the normal condition is to draw a latent model k <- num.factors if (num.xfactors ==1) { for (i in 1:num.xvar) { sem[i,1] <- paste(fact[1],"->",vars[i],sep="") if(is.numeric(factors[i])) {sem[i,2] <- vars[i]} else {sem[i,2] <- factors[i] } } k <- num.xvar+1 } else { #end of if num.xfactors ==1 #all loadings > cut in absolute value k <- 1 for (i in 1:num.xvar) { for (f in 1:num.xfactors) { if((!is.numeric(factors[i,f] ) && (factors[i,f] !="0"))|| ((is.numeric(factors[i,f]) && abs(factors[i,f]) > cut ))) { sem[k,1] <- paste(fact[f],"->",vars[i],sep="") if(is.numeric(factors[i,f])) {sem[k,2] <- paste("F",f,vars[i],sep="")} else {sem[k,2] <- factors[i,f]} k <- k+1 } #end of if } } } if(errors) { for (i in 1:num.xvar) { sem[k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[k,2] <- paste("x",i,"e",sep="") k <- k+1 } } } else { #the regression case if (title=="Structural model") title <- "Regression model" k <- num.var+1 yvars <- "Y1" } #now, if there is a ymodel, do it for y model if(!is.null(ymodel)) { if (num.yfactors ==1) { for (i in 1:num.y) { sem[k,1] <- paste(fact[1+num.xfactors],"->",yvars[i],sep="") if(is.numeric(y.factors[i] ) ) {sem[k,2] <- paste("Fy",yvars[i],sep="")} else {sem[k,2] <- y.factors[i]} k <- k +1 } } else { #end of if num.yfactors ==1 #all loadings > cut in absolute value for (i in 1:num.y) { for (f in 1:num.yfactors) { #if (!is.numeric(y.factors[i,f]) || (abs(y.factors[i,f]) > cut)) if((!is.numeric(y.factors[i,f] ) && (y.factors[i,f] !="0"))|| ((is.numeric(y.factors[i,f]) && abs(y.factors[i,f]) > cut ))) { sem[k,1] <- paste(fact[f+num.xfactors],"->",vars[i+num.xvar],sep="") if(is.numeric(y.factors[i,f])) { sem[k,2] <- paste("Fy",f,vars[i+num.xvar],sep="")} else {sem[k,2] <- y.factors[i,f]} k <- k+1 } #end of if } #end of factor } # end of variable loop } if(errors) { for (i in 1:num.y) { sem[k,1] <- paste(vars[i+num.xvar],"<->",vars[i+num.xvar],sep="") sem[k,2] <- paste("y",i,"e",sep="") k <- k+1 }} } #end of !is.null(ymodel) if (!is.null(labels)) {var.labels <- c(labels,fact) } if(!regression) { if(!is.null(Phi)) {if (!is.matrix(Phi)) Phi <- matrix(c(1,Phi,0,1),ncol=2) if(num.xfactors>1) {for (i in 2:num.xfactors) { for (j in 1:(i-1)) {if((!is.numeric(Phi[i,j] ) && (Phi[i,j] !="0"))|| ((is.numeric(Phi[i,j]) && abs(Phi[i,j]) > cut ))) { if(Phi[i,j] != Phi[j,i]){ sem[k,1] <- paste(fact[j],"->",fact[i],sep="") if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",j,"F",i,sep="")} else {sem[k,2] <- Phi[i,j] } } else { sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",i,"F",j,sep="")} else {sem[k,2] <- Phi[i,j] } } k <- k + 1 } } } } } #end of correlations within x set if(!is.null(ymodel)) { for (i in 1:num.xfactors) { for (j in 1:num.yfactors) { if((!is.numeric(Phi[i,j+num.xfactors] ) && (Phi[i,j+num.xfactors] !="0"))|| ((is.numeric(Phi[i,j+num.xfactors]) && abs(Phi[i,j+num.xfactors]) > cut ))) { # clust.graph <- addEdge( fact[j+num.xfactors],fact[i],clust.graph,1) # if (is.numeric(Phi[i,j+num.xfactors])) { edge.label[k] <- round(Phi[i,j+num.xfactors],digits)} else {edge.label[k] <- Phi[i,j+num.xfactors]} # edge.dir[k] <- "back" # edge.name[k] <- paste(fact[j+num.xfactors],"~",fact[i],sep="") sem[k,1] <- paste(fact[i],"->",fact[j+num.xfactors],sep="") if (is.numeric(Phi[i,j+num.xfactors])) {sem[k,2] <- paste("rX",i,"Y",j,sep="")} else {sem[k,2] <- Phi[i,j+num.xfactors] } k <- k + 1 } } } } } else {if(!is.null(Phi)) {if (!is.matrix(Phi)) Phi <- matrix(c(1,Phi,0,1),ncol=2) for (i in 2:num.xvar) { for (j in 1:(i-1)) { if(Phi[i,j] != Phi[j,i]){edge.dir[k] <- "back"} else {edge.dir[k] <- "both"} k <- k + 1 }} } } for(f in 1:num.factors) { sem[k,1] <- paste(fact[f],"<->",fact[f],sep="") sem[k,3] <- "1" k <- k+1 } model=sem[1:(k-1),] class(model) <- "mod" #to make for pretty output when using sem package -- suggested by John Fox return(model) } psych/R/harmonic.mean.R0000644000176200001440000000026113056556601014451 0ustar liggesusers"harmonic.mean" <- function(x,na.rm=TRUE,zero=TRUE) {if(!zero) {x[x==0] <- NA} if (is.null(nrow(x))) {1/mean(1/x,na.rm=na.rm) } else { 1/(apply(1/x,2,mean,na.rm=na.rm))} } psych/R/irt.item.diff.rasch.R0000644000176200001440000000044210474443236015474 0ustar liggesusers#steps towards a Rasch modeling program for IRT #first, estimate the item difficulties "irt.item.diff.rasch" <- function(items) { ncases <- nrow(items) item.mean <- colMeans(items,na.rm=TRUE) item.mean[item.mean<(1/ncases)] <- 1/ncases irt.item.diff.rasch <- log((1/item.mean)- 1) } psych/R/psych.R0000644000176200001440000000012114021205633013040 0ustar liggesusers"psych" <- function () {} # a dummy function to make it appear in the help menu psych/R/Yule.R0000644000176200001440000001002612441471704012645 0ustar liggesusers"Yule" <- function(x,Y=FALSE) { # if(!is.matrix(x)) {stop("x must be a matrix")} stopifnot(prod(dim(x)) == 4 || length(x) == 4) if (is.vector(x)) { x <- matrix(x, 2)} a <- x[1,1] b <- x[1,2] c <- x[2,1] d <- x[2,2] if (Y) {Yule <- (sqrt(a*d) - sqrt(b*c))/(sqrt(a*d)+sqrt(b*c))} else {Yule <- (a*d- b*c)/(a*d + b*c)} return(Yule) #Yule Q } "Yule.inv" <- function(Q,m,n=NULL) {#find the cells for a particular Yule value with fixed marginals if (length(m) > 2) { #this old way is for one correlation at a time with all 4 marginals - R1 <- m[1] R2 <- m[2] C1 <- m[3] C2 <- m[4] } else { #the better way is to specify just two marginals- allows better interface with matrices if(is.null(n)) {n<- 1} #do it as percentages R1 <- m[1] R2 <- n -R1 C1 <- m[2] C2 <- n - C1} f <- function(x) {(Q- Yule(c(x,R1-x,C1-x,R2-C1+x)))^2} xval <- optimize(f,c(min(R1-min(C2,R1),C1-min(R2,C1)),min(R1,C1))) R <- matrix(ncol=2,nrow=2) x <- xval$minimum R[1,1] <- x R[1,2] <- R1-x R[2,1] <- C1 -x R[2,2] <- R2-C1 + x return(R) } "Yule2poly" <- function(Q,m,n=NULL,correct=TRUE) { #find the phi equivalent to a Yule Q with fixed marginals .Deprecated("Yule2tet",msg="Yule2poly has been replaced by Yule2tet, please try again") t <- Yule.inv(Q,m,n=n) r <- tetrachoric(t,correct=correct)$rho return(r) } "Yule2tet" <- function(Q,m,n=NULL,correct=TRUE) { #find the phi equivalent to a Yule Q with fixed marginals t <- Yule.inv(Q,m,n=n) r <- tetrachoric(t,correct=correct)$rho return(r) } "Yule2phi" <- function(Q,m,n=NULL) { #find the phi equivalent to a Yule Q with fixed marginals t <- Yule.inv(Q,m,n=n) return(phi(t,digits=15))} "Yule2tetra" <- function(Q,m,n=NULL,correct=TRUE) { if(!is.matrix(Q) && !is.data.frame(Q)) {result <- Yule2tet(Q,c(m[1],m[2]),n=n,correct=correct) } else { nvar <- nrow(Q) if(nvar !=ncol(Q)) {stop('Matrix must be square')} if (length(m) !=nvar) {stop("length of m must match the number of variables")} result <- Q for(i in 2:nvar) { for (j in 1:(i-1)) { result[i,j] <- result[j,i] <- Yule2tet(Q[i,j],c(m[i],m[j]),n,correct=correct) } }} return(result) } "YuleBonett" <- function(x,c=1,bonett=FALSE,alpha=.05) { # if(!is.matrix(x)) {stop("x must be a matrix")} stopifnot(prod(dim(x)) == 4 || length(x) == 4) if (is.vector(x)) { x <- matrix(x, 2)} p <- x/sum(x) C <- c a <- p[1,1] b <- p[1,2] c <- p[2,1] d <- p[2,2] Rs <- rowSums(p) Cs <- colSums(p) if(bonett) {C <- .5 - (.5 - min(Rs,Cs))^2 } ad <- (a*d)^C bc <- (b*c)^C Yule <- (ad-bc)/(ad+bc) Ystar <- #See Bonett 2007 p 434 w <- (x[1,1] + .1)* (x[2,2] + .1)/((x[2,1] + .1)* (x[1,2] + .1)) #OR estimate Ystar <- (w^C -1)/(w^C+1) #this is the small cell size adjusted value vlQ <- (1/(x[1,1] + .1) + 1/ (x[2,2] + .1) + 1/(x[2,1] + .1)+ 1/ (x[1,2] + .1)) vQ <- (C^2/4)*(1-Ystar^2)^2 *vlQ #equation 9 tanhinv <- atanh(Ystar) upper <- tanh(atanh(Ystar) + qnorm(1-alpha/2) *sqrt( (C^2/4) *vlQ)) lower <- tanh(atanh(Ystar) - qnorm(1-alpha/2) *sqrt( (C^2/4) *vlQ)) result <- list(rho=Ystar,se=sqrt(vQ), upper=upper, lower=lower) class(result) <- c("psych","Yule") return(result) #Yule Q, Y, or generalized Bonett } #Find a correlation matrix of Yule correlations "YuleCor" <- function(x,c=1,bonett=FALSE,alpha=.05) { cl <- match.call() nvar <- ncol(x) rho <- matrix(NA,nvar,nvar) ci <- matrix(NA,nvar,nvar) zp <- matrix(NA,nvar,nvar) for(i in 1:nvar) { for(j in 1:i) { YB <- YuleBonett(table(x[,i],x[,j]),c=c,bonett=bonett,alpha=alpha) rho[i,j] <- rho[j,i] <- YB$rho ci[i,j] <- YB$lower ci[j,i] <- YB$upper zp[i,j] <- YB$rho/YB$se zp[j,i] <- 1-pnorm(zp[i,j]) }} colnames(rho) <- rownames(rho) <- colnames(ci) <- rownames(ci) <- colnames(x) result <- list(rho=rho,ci=ci,zp=zp,Call=cl) class(result) <- c("psych","yule") return(result) } psych/R/schmid.R0000644000176200001440000003735214060675250013212 0ustar liggesusers#corrected estimate of communality, May 21, 2007 #removed "x" from factanal call, June 14, 2007 #added ability to do 2 factors by treating them with equal loadings Jan 2008 #added use of simplimax rotation June 2008 #corrected the sign of group factors to match the original factors #modified November, 2014 to allow for covariances in addition to correlations. #also cleaned up the code to take advantage of switch "schmid" <- function (model, nfactors = 3, fm = "minres", digits=2,rotate="oblimin",n.obs=NA,option="equal",Phi=NULL,covar=FALSE,two.ok=FALSE,...) { cl <- match.call() #if Phi is not Null, then we have been given a factor matrix, otherwise #model is a correlation matrix, or if not, the correlation matrix is found #nfactors is the number of factors to extract if(!requireNamespace('GPArotation')) {stop("I am sorry, you need to have the GPArotation package installed")} #there are two types of input # 1: from a factor analysis and or rotation function with a matrix of loadings and a Phi matrix # 2: raw data or a correlation/covariance matrix (from e.g, omega) # 3 Input is the output of a factor analysis if(is.list(model)) {if((class(model)[1] == "psych") && (class(model)[2] == "fa")) {Phi <- model$Phi model <- model$loadings} else {stop("input is a list, but is not from of class 'fa' ")}} if(is.null(Phi)) { #the normal case normal.case <- TRUE nvar <-dim(model)[2] if(dim(model)[1] != dim(model)[2]) {n.obs <- dim(model)[1] if(covar) { model <- cov(model,use="pairwise")} else {model <- cor(model,use="pairwise") } } if (fm =="pc") { fact <- principal(model, nfactors,n.obs=n.obs,covar=TRUE,...) fm <- 'minres' #because we want to use factor analysis for the higher level factors message("The higher order factor is found using minres -- see the notes") } else {if ((fm == "pa") |(fm =="minres") | (fm =="wls") |(fm =="minres") |(fm =="ml")|(fm =="mle") |(fm =="gls") |(fm =="minchi") |(fm =="minrank")) {fact <- fa(model, nfactors,n.obs=n.obs,rotate="varimax",fm=fm,covar=covar) } else { stop("The method of factor extraction you specified is not available") }} orth.load <- loadings(fact) } else {model <- as.matrix(model) Phi <- as.matrix(Phi) fact <- model %*% Phi #find the orthogonal (structure) matrix from the oblique pattern and the Phi matrix orth.load <- fact #but this is not the correct factor solution ev <- eigen(Phi) orth.load <- model %*% ev$vector %*% sqrt(diag(ev$values)) colnames(orth.load) <- colnames(Phi) nfactors <- dim(fact)[2] normal.case <-FALSE} colnames(orth.load) <- paste("F",1:nfactors,sep="") if(nfactors == 1) { message("Omega_h for 1 factor is not meaningful, just omega_t") obminfact <-list(loadings= orth.load) factr <- 1 } else { #the normal case is nfactors > 2 switch(rotate, simplimax = {obminfact <- GPArotation::simplimax(orth.load)}, # promax = {obminfact <- Promax(orth.load) # rotmat <- obminfact$rotmat # Phi <- obminfact$Phi # }, promax = {#to match revised fa call pro <- kaiser(orth.load,rotate="Promax",...) #calling promax will now do the Kaiser normalization before doing Promax rotation obminfact <- pro rot.mat <- pro$rotmat Phi <- pro$Phi }, Promax = {obminfact <- Promax(orth.load) rotmat <- obminfact$rotmat Phi <- obminfact$Phi }, TargetQ = {obminfact <- do.call(rotate,list(orth.load,...)) loadings <- obminfact$loadings Phi <- obminfact$Phi }, cluster = {obminfact <- varimax(orth.load) obminfact <- target.rot(obminfact,...) loadings <- obminfact$loadings Phi <- obminfact$Phi }, target = {obminfact <- varimax(orth.load) obminfact <- target.rot(obminfact,...) loadings <- obminfact$loadings Phi <- obminfact$Phi }, oblimin = { obminfact <- try(GPArotation::oblimin(orth.load)) if(inherits(obminfact,as.character("try-error"))) {obminfact <- Promax(orth.load) #special case for examples with exactly 2 orthogonal factors message("\nThe oblimin solution failed, Promax used instead.\n") #perhaps no longer necessary with patch to GPForth and GPFoblq in GPArotation rotmat <- obminfact$rotmat Phi <- obminfact$Phi}}, geominQ = { obminfact <- try(GPArotation::geominQ(orth.load)) if(inherits(obminfact,as.character("try-error"))) {obminfact <- Promax(orth.load) #special case for examples with exactly 2 orthogonal factors message("\nThe geominQ solution failed, Promax used instead.\n") #perhaps no longer necessary with patch to GPForth and GPFoblq in GPArotation rotmat <- obminfact$rotmat Phi <- obminfact$Phi}}, bentlerQ = { obminfact <- try(GPArotation::bentlerQ(orth.load)) if(inherits(obminfact,as.character("try-error"))) {obminfact <- Promax(orth.load) #special case for examples with exactly 2 orthogonal factors message("\nThe bentlerQ solution failed, Promax used instead.\n") #perhaps no longer necessary with patch to GPForth and GPFoblq in GPArotation rotmat <- obminfact$rotmat Phi <- obminfact$Phi}}, targetQ = { obminfact <- try(GPArotation::targetQ(orth.load,...)) if(inherits(obminfact,as.character("try-error"))) {obminfact <- Promax(orth.load) #special case for examples with exactly 2 orthogonal factors message("\nThe targetQ solution failed, Promax used instead.\n") #perhaps no longer necessary with patch to GPForth and GPFoblq in GPArotation rotmat <- obminfact$rotmat Phi <- obminfact$Phi}}, biquartimin = {obminfact <- biquartimin(orth.load,...) loadings <- obminfact$loadings Phi <- obminfact$Phi rot.mat <- t(solve(obminfact$Th))} ) # # # if (rotate == "simplimax") {obminfact <- simplimax(orth.load)} else { # if((rotate == "promax") | (rotate == "Promax") ) {obminfact <- Promax(orth.load) # rotmat <- obminfact$rotmat # Phi <- obminfact$Phi # } else { # if(rotate=="TargetQ") {obminfact <- do.call(rotate,list(orth.load,...)) # loadings <- obminfact$loadings # Phi <- obminfact$Phi # } else { # # if ((rotate == "cluster") | (rotate == "target")) {obminfact <- varimax(orth.load) # obminfact <- target.rot(obminfact,...) # loadings <- obminfact$loadings # Phi <- obminfact$Phi # } else { # obminfact <- try(oblimin(orth.load)) # if(class(obminfact)== as.character("try-error")) {obminfact <- Promax(orth.load) #special case for examples with exactly 2 orthogonal factors # message("\nThe oblimin solution failed, Promax used instead.\n") #perhaps no longer necessary with patch to GPForth and GPFoblq in GPArotation # rotmat <- obminfact$rotmat # Phi <- obminfact$Phi} }} } # } } if(nfactors > 1) rownames(obminfact$loadings) <- attr(model,"dimnames")[[1]] if(!normal.case) { fload <- model factr <- Phi model <- fload %*% Phi %*% t(fload) diag(model) <- 1} else { fload <- obminfact$loadings #factr <- t(obminfact$Th) %*% (obminfact$Th) factr <- obminfact$Phi} if (nfactors ==1) {gload <- c(1) warning("Omega_h and Omega_asymptotic are not meaningful with one factor") } else { colnames(factr) <- rownames(factr) <- paste("F",1:nfactors,sep="") #make it a vector if (nfactors>2) { gfactor <- fa(factr,fm=fm) #The first factor of the factor intercorrelation matrix #added fm=fm March 5, 2011 gload <- loadings(gfactor) } else {gload<- c(NA,NA) #consider the case of two factors if(option=="equal") { gload[1] <- sqrt(abs(factr[1,2])) gload[2] <- sign(factr[1,2])*sqrt(abs(factr[1,2])) if(!two.ok) message("\nThree factors are required for identification -- general factor loadings set to be equal. \nProceed with caution. \nThink about redoing the analysis with alternative values of the 'option' setting.\n")} else { if(option=="first") { gload[1] <- 1 # gload[2] <- abs(factr[1,2]) gload[2] <- (factr[1,2]) message("\nThree factors are required for identification -- general factor loading set to be 1 for group factor 1. \nProceed with caution. \nThink about redoing the analysis with alternative values of the 'option' setting.\n")} else { gload[2] <- 1 # gload[1] <- abs(factr[1,2]) gload[1] <- (factr[1,2]) message("\nThree factors are required for identification -- general factor loadings are set to be 1 for group factor 2.\nProceed with caution. \nThink about redoing the analysis with alternative values of the 'option' setting.\n")} } } } gprimaryload <- fload %*% gload colnames(gprimaryload) <- "g" h2 <- diag(orth.load %*% t(orth.load)) # u2 <- 1 - h2 u2 <- diag(model) - h2 uniq <- diag(model)- fload^2 guniq <- diag(model) - gprimaryload^2 #Ig <- matrix(0, ncol = nfactors, nrow = nfactors) #diag(Ig) <- gload Ig <- diag(drop(gload)) #3/5/11 primeload <- fload %*% Ig g.percent <- gprimaryload^2/h2 colnames(g.percent) <- "p2" uniq2 <- diag(model) - uniq - primeload^2 uniq2[uniq2<0] <- 0 sm <- sign(fload) * sqrt(uniq2) #added June 1, 2010 to correctly identify sign of group factors F <- cbind(gprimaryload, sm) #the factor pattern matrix #the next two lines are actually not needed because the factors are orthogonal Structure <- t( Pinv(t(F) %*% F) %*% t(F) %*% orth.load %*% t(orth.load)) Phi.S <- t(Structure) %*% F %*% Pinv(t(F) %*% F) #this is the pseudo inverse Phi which is not the identity colnames(sm) <- paste0("F",1:nfactors,"*") if(!is.null(Phi)) { result <- list(sl = cbind(gprimaryload, sm,h2, u2,p =g.percent), orthog = orth.load, oblique=fload, phi =factr, gloading = gload,S.Phi = Phi.S,Call=cl)} else{ result <- list(sl = cbind(gprimaryload, sm,h2, u2,p=g.percent), orthog = orth.load, oblique=fload, phi =factr, gloading = gload,dof=fact$dof,objective=fact$criteria[1],STATISTIC=fact$STATISTIC,PVAL=fact$PVAL,RMSEA=fact$RMSEA,BIC=fact$BIC,rms = fact$rms,crms=fact$crms,n.obs=n.obs,scores=fact$scores,S.Phi = Phi.S,Call=cl )} class(result) <- c("psych" ,"schmid") return(result) } #Added June 20, 2018 to try to do Neils Waller's direct Schmid Leiman Procrustes <-function(L, Target=NULL){#Adapted from Niels Waller (2017) if(is.null(Target)) Target <- factor2cluster(L) tM1M2 <- t(Target) %*% L svdtM1M2 <- svd(tM1M2) P <- svdtM1M2$u Q <- svdtM1M2$v T <- Q%*%t(P) ## Orthogonally rotate L to Target return(list(loadings = L %*%T,rotation = T)) } #allowing to specify a number of rotations oblique.rotations <- function(rotate="oblimin",loadings,...){ if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax" | rotate =="geominQ" | rotate =="bentlerQ" |rotate == "targetQ" ) { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- try(do.call(getFromNamespace(rotate,'GPArotation'),list(loadings,...))) if(inherits(ob, as.character("try-error"))) {warning("The requested transformaton failed, Promax was used instead as an oblique transformation") ob <- Promax(loadings)} loadings <- ob$loadings Phi <- ob$Phi rot.mat <- t(solve(ob$Th))} } return(list(loadings=loadings,Phi=Phi)) } #direct Schmid Leiman adapted from Waller (2017) directSl <- function(m,nfactors=3,fm="minres",rotate="oblimin",cut=.3){ cl <- match.call() nvar <- ncol(m) if(isCorrelation(m)) {C <- m} else { C <- cor(m,use="pairwise")} f <- fa(C,nfactors=nfactors,fm=fm,rotate ='none') #unrotated solution #construct the target from the rotated solution f.obl <- oblique.rotations(rotate=rotate,loadings = f$loadings)$loadings targ <- factor2cluster(f.obl,cut=cut) #Waller adjustments to target and factor model targ <- cbind(g=rep(1,nvar),targ) f0 <- cbind(rep(0,nvar),f$loadings) direct <- Procrustes(f0,targ)$loadings #The Waller Procrustes solution colnames(direct) <- c("g",paste0("F",1:nfactors,"*")) #put some labels in class(direct) <- "loadings" results <- list(direct=direct,C=C,f=f,targ=targ,Call=cl) class(results) <- c("psych","direct") return(results) } omegaDirect <- function(m,nfactors=3,fm="minres",rotate="oblimin",cut=.3,plot=TRUE,main="Direct Schmid Leiman"){ cl <- match.call() dsl <- directSl(m=m,nfactors=nfactors,fm=fm,rotate=rotate,cut=cut) direct <- dsl$direct m <- dsl$C f <- dsl$f targ <- dsl$targ if(isCorrelation(m)) {C <- m} else { C <- cor(m,use="pairwise")} sum.g <- sum(direct[,1]) Vt <- sum(C) #the total variance in the matrix omega.g <-sum.g^2/Vt h2 <- rowSums(direct^2) H2 <- sum(h2) u2 <-1 - h2 U2 <- sum(u2) om.tot <-1 - U2/Vt #find subset omegas omg <- omgo <- omt<- rep(NA,nfactors+1) sub <- apply(direct,1,function(x) which.max(abs(x[2:(nfactors+1)]))) grs <- 0 for(group in( 1:nfactors)) { groupi <- which(sub==group) if(length(groupi) > 0) { Vgr <- sum(C[groupi,groupi]) gr <- sum(direct[groupi,(group+1)]) grs <- grs + gr^2 omg[group+1] <- gr^2/Vgr omgo[group+1] <- sum(direct[groupi,1])^2/Vgr omt[group+1] <- (gr^2+ sum(direct[groupi,1])^2)/Vgr }} omgo[1] <- sum(direct[,1])^2/sum(C) #omega h omg[1] <- grs/sum(C) #omega of subscales omt[1] <- om.tot om.group <- data.frame(total=omt,general=omgo,group=omg) rownames(om.group) <- colnames(direct)[1:(nfactors+1)] result <- list(loadings=direct,omega.g=omega.g,om.group=om.group,orth.f = f,Target=targ,Call=cl) class(result) <- c("psych" ,"omegaDirect") if(plot) omega.diagram(result,sort=TRUE,simple=FALSE,cut=cut,main=main) return(result) } psych/R/test.psych.r0000755000176200001440000002234513724200154014077 0ustar liggesusers#Quality control function to run through hard problems "test.psych" <- function(first=1,last=5,short=TRUE,all=FALSE,fapc=FALSE) { s1 <- datasets::USArrests # Violent Crime Rates by US State (4 variables) s2 <- datasets::attitude #The Chatterjee-Price Attitude Data s3 <- datasets::Harman23.cor$cov # Harman Example 2.3 8 physical measurements s4 <- datasets::Harman74.cor$cov # Harman Example 7.4 24 mental measurements s5 <- datasets::ability.cov$cov # 6 Ability and Intelligence Tests #convert covariance to correlation d5 <- diag(1/sqrt(diag(s5))) s5 <- d5 %*% s5 %*% d5 datasets <- list(s1,s2,s3,s4,s5) out <- list() for (i in first:last) { test.data <- datasets[[i]] pc <- principal(test.data) pc2 <- principal(test.data,2) if(i < 3) { fa2 <- fa(test.data,2) fp <- fa.parallel(test.data) vss2 <- VSS(test.data) vsspc <- VSS(test.data,fm="pc") } else { cat("\n Testing fa and VSS i =",i, "\n") fa2 <- fa(test.data,2,n.obs=200) cluster.plot(fa2) fp <- fa.parallel(test.data,n.obs=200) vss2 <- VSS(test.data,n.obs=200) vsspc <- VSS(test.data,fm="pc",n.obs=200) } ic <- ICLUST(test.data,plot=FALSE) if(requireNamespace('GPArotation')) {om <- omega(test.data,plot=FALSE)} else {warning("Omega requires the GPArotation package to be loaded") om <- NULL} fc <- factor.congruence(pc2,fa2) d <- describe(test.data) keys <- matrix(rep(0,dim(test.data)[2]*2),ncol=2) keys[,1] <- 1 keys[1:3,2] <- 1 rownames(keys) <- colnames(test.data) if( dim(test.data)[1] != dim(test.data)[2]) {test.score <- scoreItems(keys,test.data)} else {test.score <- cluster.cor(keys,test.data)} out <- list(out,paste("test",i),pc,pc2,fa2,fp,ic,om,fc,vss2,vsspc,d,test.score) } #end loop #a few more tests cat("\n Testing cor plot and sim.item\n") set.seed(42) #this way our simulaton will be consistent simple <- sim.item(nvar=24) circ <- sim.circ(nvar=24) cor.plot(cor(circ),colors=TRUE,zlim=c(-1,1),main="24 variables in a circumplex") simple.par <- fa.parallel(simple) fa.simple <- fa(simple,2) cor.plot(fa.simple,TRUE,n=4) #fa.simple.keys <- ICLUST.sort(fa.simple,keys=TRUE) #why this way # simple.scores <- scoreItems(fa.simple.keys$clusters,simple) fa.simple.keys <- factor2cluster(fa.simple) simple.scores <- scoreItems(fa.simple.keys,simple) pairs.panels(simple.scores$scores) cat("\n Test of sim.VSS\n") f4 <- sim.VSS() psych.d <- NULL #the next test, phi.demo, throws multiple warnings that are from the polycor package and can not be found #if (!require(polycor)) { warning("psycho.demo requires the polycor package") psych.d <- NULL } else {psych.d <- phi.demo() } cong <- sim.congeneric() if(all) { #test of factoring and scoring singular data -- fails on some platforms cat("\n Test of a singular matrix\n") #a test example of a singular matrix IRIS <- datasets::iris[,1:4] IRIS[,5] <- datasets::iris[,1]+datasets::iris[,2] f.iris <- fa(IRIS,5,scores=TRUE) #this is get around the failure of tenBerge for a singular matrix p.iris <- principal(IRIS,5,scores=TRUE) #this will fail if not using minres or pa } cat("\n Test of sim.circ\n") cluster.plot(fa(sim.circ(nvar=24),nfactors=2),title="two circumplex factors") pairs.panels(cong) #this section tests various functions that use Rgraphviz (if it is installed) if(FALSE) { #{if(require(Rgraphviz) && !FALSE) { fa.graph(fa(item.sim(16),2) ,title="Principal factor of a simple structure") ic.out <- ICLUST(s4,title="ICLUST of 24 Mental abilities") v9 <- omega(sim.hierarchical(),title="Omega with Schmid Leihman") omega.graph(v9,sl=FALSE,title="Omega with hierarchical factors") #set up the parameters for the structure graph X6 <- matrix(c("a","b","c",rep(0,6),"d","e","f"),nrow=6) colnames(X6) <- c("L1","L2") rownames(X6) <- c("x1","x2","x3","x4","x5","x6") Y3 <- matrix(c("u","w","z"),ncol=1) colnames(Y3) <- "Y" rownames(Y3) <- c("y1","y2","y3") phi21 <- matrix(c(1,0,"r1",0,1,"r2",0,0,1),ncol=3) colnames(phi21) <- rownames(phi21) <- c("L1","L2","Y") structure.graph(X6,phi21,Y3,title="Symbolic structural model") } else {warning("fa.graph, omega.graph, ICLUST.rgraph, structure.graph require Rgraphviz and were not tested") } fa.diagram(fa(item.sim(16),nfactors=2)) cat("\n Test of ICLUST\n") ic.out <- ICLUST(s4,title="ICLUST of 24 Mental abilities") v9 <- omega(sim.hierarchical(),title="Omega with Schmid Leihman") omega.diagram(v9,sl=FALSE,main="Omega with hierarchical factors") #set up the parameters for the structure graph X6 <- matrix(c("a","b","c",rep(0,6),"d","e","f"),nrow=6) colnames(X6) <- c("L1","L2") rownames(X6) <- c("x1","x2","x3","x4","x5","x6") Y3 <- matrix(c("u","w","z"),ncol=1) colnames(Y3) <- "Y" rownames(Y3) <- c("y1","y2","y3") phi21 <- matrix(c(1,0,"r1",0,1,"r2",0,0,1),ncol=3) colnames(phi21) <- rownames(phi21) <- c("L1","L2","Y") example.model <- structure.diagram(X6,phi21,Y3,main="Symbolic structural model") cat("\n Test of fa.extension \n") R <- cor(sim.item(16)) ss <- c(1,3,5,7,9,11,13,15) f <- fa(R[ss,ss],2) foe <- fa.extension(R[ss,-ss],f) fa.diagram(fa.results=f,fe.results=foe) #now test the iteration options and the rotation options in fa #not run by default for official testing if(fapc) { cat("\n Test of various factor solutions\n") data1 <- psychTools::bfi f3 <- fa(data1[1:15],3,n.iter=5) f3 <- fa(data1[1:15],3,n.iter=5,rotate="Varimax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="varimax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="quartimax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="bentlerT") f3 <- fa(data1[1:15],3,n.iter=5,rotate="geominT") Target <- matrix(c(rep(1,5),rep(0,15),rep(1,5),rep(0,15),rep(1,5)),ncol=3) f3 <- fa(data1[1:15],3,n.iter=5,rotate="targetT",Target=Target) f3 <- fa(data1[1:15],3,n.iter=5,rotate="bifactor") f3 <- fa(data1[1:15],3,n.iter=5,rotate="TargetT",Target=Target) f3 <- fa(data1[1:15],3,n.iter=5,rotate="equamax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="varimin") #f3 <- fa(data1[1:15],3,n.iter=5,rotate="specialQ") f3 <- fa(data1[1:15],3,n.iter=5,rotate="Promax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="promax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="cluster") f3 <- fa(data1[1:15],3,n.iter=5,rotate="biquartimin") Targ <- make.keys(15,list(f1=1:5,f2=6:10,f3=11:15)) Targ <- scrub(Targ,isvalue=1) #fix the 0s, allow the NAs to be estimated Targ <- list(Targ) #input must be a list f3 <- fa(data1[1:15],3,n.iter=5,rotate="TargetQ",Target=Targ) #Michael Brown's #f3 <- fa(data1[1:15],3,n.iter=5,rotate="specialQ") f3 <- fa(data1[1:15],3,n.iter=5,rotate="oblimin") f3 <- fa(data1[1:15],3,n.iter=5,rotate="quartimin") f3 <- fa(data1[1:15],3,n.iter=5,rotate="simplimax") f3 <- fa(data1[1:15],3,n.iter=5,rotate="geominQ") f3 <- fa(data1[1:15],3,n.iter=5,rotate="targetQ",Target=Target) f3 <- fa(data1[1:15],3,n.iter=5,rotate="bentlerQ") cat("\n Test of factoring and principal components \n") data2 <- as.data.frame(psychTools::ability) f1 <- fa(data2) fpoly <- fa(data2[1:10],2,n.iter=5,cor="poly") f1 <- fa(data2,n.iter=4) f1p <- fa(data2,n.iter=4,cor="tet") cat("\n Test of principal components \n") p3 <- principal(data1[1:15],3) p3 <- principal(data1[1:15],3,rotate="Varimax") p3 <- principal(data1[1:15],3,rotate="varimax") p3 <- principal(data1[1:15],3,rotate="quartimax") p3 <- principal(data1[1:15],3,rotate="bentlerT") p3 <- principal(data1[1:15],3,rotate="geominT") Target <- matrix(c(rep(1,5),rep(0,15),rep(1,5),rep(0,15),rep(1,5)),ncol=3) p3 <- principal(data1[1:15],3,rotate="targetT",Target=Target) p3 <- principal(data1[1:15],3,rotate="TargetT",Target=Target) p3 <- principal(data1[1:15],3,rotate="bifactor") p3 <- principal(data1[1:15],3,rotate="varimin") p3 <- principal(data1[1:15],3,rotate="bentlerT") p3 <- principal(data1[1:15],3,rotate="geominT") p3 <- principal(data1[1:15],3,rotate="equamax") p3 <- principal(data1[1:15],3,rotate="Promax") p3 <- principal(data1[1:15],3,rotate="promax") p3 <- principal(data1[1:15],3,rotate="cluster") p3 <- principal(data1[1:15],3,rotate="biquartimin") p3 <- principal(data1[1:15],3,rotate="equamax") cat("\n Test of target rotation \n") Targ <- make.keys(15,list(f1=1:5,f2=6:10,f3=11:15)) Targ <- scrub(Targ,isvalue=1) #fix the 0s, allow the NAs to be estimated Targ <- list(Targ) #input must be a list p3 <- principal(data1[1:15],3,rotate="TargetQ",Target=Targ) p3 <- principal(data1[1:15],3,rotate="oblimin") p3 <- principal(data1[1:15],3,rotate="quartimin") p3 <- principal(data1[1:15],3,rotate="simplimax") p3 <- principal(data1[1:15],3,rotate="geominQ") p3 <- principal(data1[1:15],3,rotate="biquartimin") p3 <- principal(data1[1:15],3,rotate="targetQ",Target=Target) p3 <- principal(data1[1:15],3,rotate="bentlerQ") cat("\n Test of principal components of polychorics \n") R <- polychoric(data2[1:10])$rho fpoly <- principal(R,2) #cor is not an option in principal f1 <- principal(data2) R <- tetrachoric(data2)$rho f1p <- principal(R) } out <- list(out,fa.simple,psych.d) if (!short) { return(out)} }#end function psych/R/comorbidity.R0000644000176200001440000000113313212275634014253 0ustar liggesusers"comorbidity" <- function(d1,d2,com,labels=NULL) { cl <- match.call() twobytwo <- matrix(c(com, d1-com,d2-com,1-d1-d2+com),ncol=2) if(is.null(labels)) { colnames(twobytwo) <- c("D1","d1") rownames(twobytwo) <- c("D2","d2")} else { colnames(twobytwo) <- c(labels[1],paste("-",labels[1],sep="")) rownames(twobytwo) <- c(labels[2],paste("-",labels[2],sep=""))} phi <- phi(twobytwo) Yule <- Yule(twobytwo) tetra<- tetrachoric(twobytwo) answer <- list(twobytwo=twobytwo,phi=phi,Yule=Yule,tetra=tetra,Call=cl) class(answer) <- c("psych","comorbid") return(answer) }psych/R/cortest.normal.R0000644000176200001440000000617513365443475014725 0ustar liggesusers"cortest.normal" <- function(R1,R2=NULL, n1=NULL,n2=NULL,fisher=TRUE) { cl <- match.call() if (dim(R1)[1] != dim(R1)[2]) {n1 <- dim(R1)[1] message("R1 was not square, finding R from data") R1 <- cor(R1,use="pairwise")} if(!is.matrix(R1) ) R1 <- as.matrix(R1) #converts data.frames to matrices if needed p <- dim(R1)[2] if(is.null(n1)) {n1 <- 100 warning("n not specified, 100 used") } if(is.null(R2)) { if(fisher) {R <- 0.5*log((1+R1)/(1-R1)) R <- R*R} else {R <- R1*R1} diag(R) <- 0 E <- (sum(R*lower.tri(R))) chisq <- E *(n1-3) df <- p*(p-1)/2 p.val <- pchisq(chisq,df,lower.tail=FALSE) } else { #end of 1 matrix test if (dim(R2)[1] != dim(R2)[2]) {n2 <- dim(R2)[1] message("R2 was not square, finding R from data") R2 <- cor(R2,use="pairwise")} if(!is.matrix(R2) ) R2 <- as.matrix(R2) if(fisher) { R1 <- 0.5*log((1+R1)/(1-R1)) R2 <- 0.5*log((1+R2)/(1-R2)) diag(R1) <- 0 diag(R2) <- 0 } R <- R1 -R2 #direct difference R <- R*R if(is.null(n2)) n2 <- n1 n <- (n1*n2)/(n1+n2) #why do I do this? should it be 2 * (n1*n2)/(n1+n2) or #n <- harmonic.mean(c(n1,n2)) #no, actually this gives the right results E <- (sum(R*lower.tri(R))) chisq <- E *(n-3) df <- p*(p-1)/2 p.val <- pchisq(chisq,df,lower.tail=FALSE) } result <- list(chi2=chisq,prob=p.val,df=df,Call=cl) class(result) <- c("psych", "cortest") return(result) } #version of 2008 #commented 2018 #the following is done for non symmetric matrices with the same logic #version of August 28,2011 #not yet ready for prime time "cortest.normal1" <- function(R1,R2=NULL, n1=NULL,n2=NULL,fisher=TRUE) { cl <- match.call() if(!is.matrix(R1) ) R1 <- as.matrix(R1) #converts data.frames to matrices if needed if(!is.matrix(R2) ) R2 <- as.matrix(R2) #converts data.frames to matrices if needed r <- dim(R1)[1] c <- dim(R1)[2] R1 <- 0.5*log((1+R1)/(1-R1)) R2 <- 0.5*log((1+R2)/(1-R2)) R <- R1 -R2 #direct difference R <- R*R if(is.null(n2)) n2 <- n1 n <- (n1*n2)/(n1+n2) #equally problematic E <- sum(R) chisq <- E *(n-3) df <- r*c p.val <- pchisq(chisq,df,lower.tail=FALSE) result <- list(chi2=chisq,prob=p.val,df=df,Call=cl) class(result) <- c("psych", "cortest") return(result) } #see cortest for another version test.cortest.normal <- function(n.var=10,n1=100,n2=1000,n.iter=100) { R <- diag(1,n.var) summary <- list() for(i in 1:n.iter) { x <- sim.correlation(R,n1) if(n2 >3 ) { y <- sim.correlation(R,n2) summary[[i]] <- cortest(x,y,n1=n1,n2=n2)$prob } else {summary[[i]] <- cortest(x,n1=n1)$prob } } result <- unlist(summary) return(result) } psych/R/omega.graph.R0000644000176200001440000001655112456461256014137 0ustar liggesusers#modified January 20, 2009 to create sem commands #modified May 30, 2008 to try to get arrows going both ways in the sl option. #this now works, but sometimes two lines don't have arrows. #Created May 20, 2007 #modified June 2 to clarify the Rgraphviz issue #modified July 12 to fix label problem #take the output from omega and graph it #fixed Sept 16 to draw sl solutions correctly "omega.graph" <- function(om.results,out.file=NULL,sl=TRUE,labels=NULL, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 10), rank.direction=c("RL","TB","LR","BT"), digits=1,title="Omega", ...){ if(!requireNamespace('Rgraphviz')) {stop("I am sorry, you need to have the Rgraphviz package installed") #create several dummy functions to get around the "no visible global function definition" problem nodes <- function() {} addEdge <- function() {} subGraph <- function(){} } # if(!requireNamespace(graph)) {stop("I am sorry, you need to have the graph package installed") } if (sl) {factors <- as.matrix(om.results$schmid$sl) } else{factors <- as.matrix(om.results$schmid$oblique)} rank.direction <- match.arg(rank.direction) #first some basic setup parameters num.var <- dim(factors)[1] #how many variables? if (sl) {num.factors <- dim(factors)[2] -4 } else {num.factors <- dim(factors)[2]} gloading <- om.results$schmid$gloading vars <- paste("V",1:num.var,sep="") if (!is.null(labels)) {vars <- paste(labels)} else{vars <- rownames(factors) } if(sl) {fact <- c("g",paste("F",1:num.factors,"*",sep="")) } else {fact <- c("g",paste("F",1:num.factors,sep="")) } # e.g. "g" "F'1" "F2" "F3" clust.graph <- new("graphNEL",nodes=c(vars,fact),edgemode="directed") graph.shape <- c(rep("box",num.var),rep("ellipse",num.factors+1)) graph.rank <- c("sink", rep("same",num.var),rep("source",num.factors)) #this doesn't seem to make a difference names(graph.shape) <- nodes(clust.graph) names(graph.rank) <- nodes(clust.graph) if (sl) {edge.label <- rep("",num.var*2) #this basically just sets up the vectors to be the right size edge.dir <-rep("forward",num.var*2) # edge.arrows <-rep("open",num.var+num.factors) edge.arrows <-rep("open",num.var*2) edge.name <- rep("",num.var*2) names(edge.label) <- seq(1:num.var*2) names(edge.dir) <-rep("",num.var*2) #names(edge.arrows) <-rep("",num.var+num.factors) names(edge.arrows) <-rep("",num.var*2) sem <- matrix(rep(NA,6*(2*num.var + num.factors)),ncol=3) #used for sem diagram } else { edge.label <- rep("",num.var+num.factors) edge.name <- rep("",num.var+num.factors) edge.arrows <-rep("open",num.var+num.factors) edge.dir <-rep("forward",num.var*2) names(edge.label) <- seq(1:num.var+num.factors) names(edge.dir) <- seq(1:num.var+num.factors) names(edge.arrows) <- seq(1:num.var+num.factors) sem <- matrix(rep(NA,6*(num.var + num.factors)+3),ncol=3) #used for sem diagram } #show the cluster structure with ellipses if (sl) { l <- matrix(factors[,2:(num.factors+1)],ncol=num.factors) } else { l <- factors } m1 <- matrix(apply(t(apply(l, 1, abs)), 1, which.max), ncol = 1) if (sl) { k <- num.var for (i in 1:num.var) { clust.graph <- addEdge( vars[i],fact[1], clust.graph,1) edge.label[i] <- round(factors[i,1],digits) edge.name[i] <- paste(vars[i],"~",fact[1],sep="") edge.arrows[i] <- paste("open") edge.dir[i] <- paste("back") sem[i,1] <- paste(fact[1],"->",vars[i],sep="") sem[i,2] <- vars[i] } } else { k <- num.factors for (j in 1:num.factors) {clust.graph <- addEdge(fact[1], fact[j+1], clust.graph,1) #hierarchical g edge.label[j] <- round(gloading[j],digits) edge.name[j] <- paste(fact[1],"~",fact[j+1],sep="") sem[j,1] <- paste(fact[1],"->",fact[1+j],sep="") sem[j,2] <- paste("g",fact[1+j],sep="") } } for (i in 1:num.var) { clust.graph <- addEdge(fact[1+m1[i]], vars[i], clust.graph,1) edge.label[i+k] <- round(l[i,m1[i]],digits) edge.name[i+k] <- paste(fact[1+m1[i]],"~",vars[i],sep="") edge.arrows[i+k] <- paste("open") sem[i+k,1] <- paste(fact[1+m1[i]],"->",vars[i],sep="") sem[i+k,2] <- paste(fact[1+m1[i]],vars[i],sep="") } # edge.label[(i-1)*2+1] <- results[i,"r1"] # edge.name [(i-1)*2+1] <- paste(row.names(results)[i],"~", results[i,1],sep="") if(sl) { k <- num.var*2 for (i in 1:num.var) { sem[i+k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[i+k,2] <- paste("e",i,sep="") } k <- k + num.var for (f in 1:num.factors) { sem[f+k,1] <- paste(fact[1+f],"<->",fact[1+f],sep="") sem[f+k,3] <- "1" } k <- k+ num.factors sem[k+1,1] <- paste("g <->g") sem[k+1,3] <- "1" k<- k+1 } else { k <- num.var + num.factors for (i in 1:num.var) { sem[i+k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[i+k,2] <- paste("e",i,sep="") } k <- 2*num.var + num.factors for (f in 1:num.factors) { sem[f+k,1] <- paste(fact[f+1],"<->",fact[f+1],sep="") sem[f+k,3] <- "1" } k <- 2*num.var + 2*num.factors sem[k+1,1] <- paste("g<->g") sem[k+1,3] <- "1" k <- k+1 } nAttrs <- list() #node attributes eAttrs <- list() #edge attributes if(FALSE) { if (!is.null(labels)) {var.labels <- c(labels,fact) names(var.labels) <- nodes(clust.graph) nAttrs$label <- var.labels names(edge.label) <- edge.name } } names(edge.label) <- edge.name names(edge.dir) <- edge.name names(edge.arrows) <- edge.name nAttrs$shape <- graph.shape nAttrs$rank <- graph.rank eAttrs$label <- edge.label if(sl) { eAttrs$dir<- edge.dir eAttrs$arrowhead <- edge.arrows eAttrs$arrowtail<- edge.arrows } attrs <- list(node = list(shape = "ellipse", fixedsize = FALSE),graph=list(rankdir=rank.direction, fontsize=10,bgcolor="white" )) # obs.var <- subGraph(vars,clust.graph) # cluster.vars <- subGraph(fact,clust.graph) # observed <- list(list(graph=obs.var,cluster=TRUE,attrs=c(rank=""))) # plot(clust.graph, nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs,subGList=observed,main=title) plot(clust.graph,nodeAttrs = nAttrs,edgeAttrs = eAttrs,attrs = attrs,main=title) #not clear if the subGList makes any difference if(!is.null(out.file) ){toDotty(clust.graph,out.file,nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs) } colnames(sem) <- c("Path","Parameter","Initial Value") return(sem=sem[1:k,]) } psych/R/simCor.r0000644000176200001440000000455414330276413013233 0ustar liggesusers#heavily modified August 15, 2020 to allow for skewing of data, etc. "simCor" <- "sim.correlation" <- function(R,n=1000,data=FALSE,scale=TRUE, skew=c("none","log","lognormal","sqrt","abs"),vars=NULL,latent=FALSE,quant=NULL) { eX <- eigen(R) nvar <- ncol(R) if(missing(skew)) skew <- "none" if(missing(vars)) vars <- 1:nvar if(skew=="lognormal") {observed <- matrix(rlnorm(nvar * n),n)} else { observed <- matrix(rnorm(nvar * n),n) } #create the data if(latent) { #apply the transformations first -- probably not very helpful min.obs <- apply(observed,2,min) if(skew == "abs") { observed[,vars] <- abs(observed[,vars]) } else { observed <- t(t(observed)-min.obs + .1) } switch(skew, none = observed <- observed, log = observed[,vars] <- log(observed[,vars]), lognormal = observed[,vars] <- observed[,vars], sqrt = observed[,vars] <-sqrt(observed[,vars]), abs = observed <- observed #they are already done ) if(!missing(quant)) { cut <- quantile(observed,quant) observed[observed[,vars] < cut] <- 0 observed[observed[,vars] >= cut] <- 1} } observed <- -t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(observed)) #apply the eigen solution if(scale) observed <- scale(observed) if(!latent) { min.obs <- apply(observed,2,min) if(skew == "abs") { observed[,vars] <- abs(observed[,vars]) } else { observed <- t(t(observed)-min.obs + .1) } switch(skew, none = observed <- observed, log = observed[,vars] <- log(observed[,vars]), lognormal = observed[,vars] <- observed[,vars], sqrt = observed[,vars] <-sqrt(observed[,vars]), abs = observed[,vars] <- observed[,vars] ) if(scale) observed <- scale(observed) if(!missing(quant)) { cut <- rep(0,nvar) if(length(quant) == 1) quant <- rep(quant,nvar) for(i in 1:length(vars)) {cut[i] <- quantile(observed[,vars[i]],quant[i]) observed[observed[,vars[i]] < cut[i] ,vars[i]] <- -999 observed[observed[,vars[i]] >= cut[i],vars[i]] <- 1} } observed[observed== -999] <- 0 } if(!is.null(colnames(R)) ) {colnames(observed) <- colnames(R)} else {colnames(R) <- paste0("V",1:nvar)} if(data) {result <- observed} else { result <- cor(observed)} return(result)} psych/R/irt.fa.R0000644000176200001440000001004413571772627013127 0ustar liggesusers"irt.fa" <- function(x,nfactors=1,correct=TRUE,plot=TRUE,n.obs=NULL,rotate="oblimin",fm="minres",sort=FALSE,...) { cl <- match.call() if (is.matrix(x) | is.data.frame(x)) { if(is.null(n.obs)) n.obs <- dim(x)[1] nvar <- ncol(x) vname <- colnames(x) x <- as.matrix(x) if(!is.numeric(x)) {message("Converted non-numeric matrix input to numeric. \n Are you sure you wanted to do this?\n Please check your data") x <- matrix(as.numeric(x),ncol=nvar)} colnames(x) <- vname tx <- table(as.matrix(x)) if(dim(tx)[1] ==2) {tet <- tetrachoric(x,correct=correct) typ = "tet"} else {tet <- polychoric(x) typ = "poly"} r <- tet$rho tau <- tet$tau} else {if (!is.null(x$rho)) { r <- x$rho tau <- x$tau if(is.null(n.obs)) {n.obs <- x$n.obs} typ <- class(x)[2] if (typ == "irt.fa") typ <- "tet" } else {stop("x must be a data.frame or matrix or the result from tetra or polychoric")} } t <- fa(r,nfactors=nfactors,n.obs=n.obs,rotate=rotate,fm=fm,...) if(sort) {t <- fa.sort(t) #added 04/06/16 if(typ !="tet" ) {tau <- tau[t$order,] } else {tau <- tau[t$order] } } nf <- dim(t$loadings)[2] diffi <- list() #flag <- which(abs(t$loadings) > 1,arr.ind=TRUE) #this throws an error if a Heywood case - but actually don't do this for (i in 1:nf) {diffi[[i]] <- tau/sqrt(1-t$loadings[,i]^2) } discrim <- t$loadings/sqrt(1-t$loadings^2) if(any(is.nan(discrim))) { for (i in 1:nf) { bad <- which(is.nan(discrim[,i])) if(length( bad) > 0) { warning("A discrimination with a NaN value was replaced with the maximum discrimination for factor ", i, " and item(s) ",bad, "\nexamine the factor analysis object (fa) to identify the Heywood case. \nThe item informations are probably suspect as well for this factor. \nYou might try a different factor extraction technique. ") discrim[is.nan(discrim[,i]),i] <- max(discrim[,i],na.rm=TRUE) diffi[[i]][bad,] <- tau[bad,] } }} class(diffi) <- NULL class(discrim) <- NULL tl <- t$loadings class(tl) <- NULL irt <- list(difficulty=diffi,discrimination=discrim) nlevels <- dim(diffi[[1]])[2] #if(!is.null(nlevels)) { #colnames(coeff) <- c(paste("Location",1:nlevels,sep=""),"Discrimination",paste("tau",1:nlevels,sep=""),"Loading") } else { #colnames(coeff) <- c("Location","Discrimination","tau","Loading")} result <- list(irt=irt,fa = t,rho=r,tau=tau,n.obs=n.obs,Call=cl) switch(typ, tet = { class(result) <- c("psych","irt.fa")}, tetra ={class(result) <- c("psych","irt.fa")}, poly = {class(result) <- c("psych","irt.poly")}, irt.poly = {class(result) <- c("psych","irt.poly")}) if(plot) {pr <- plot(result) result$plot <- pr} return(result) } #convert a factor analysis output to an IRT output #December 9, 2012 #modifed June 5, 2016 to allow sorted output to work "fa2irt" <- function(f,rho,plot=TRUE,n.obs=NULL) { cl <- match.call() tau <- rho$tau if(!is.null(f$order)) {if (!is.null(ncol(tau))) { tau <- tau[f$order,] } else {tau <- tau[f$order] }} #adjust for sorting r <- rho$rho nf <- ncol(f$loadings) diffi <- list() #flag <- which(abs(t$loadings) > 1,arr.ind=TRUE) #this throws an error if a Heywood case for (i in 1:nf) {diffi[[i]] <- tau/sqrt(1-f$loadings[,i]^2) } discrim <- f$loadings/sqrt(1-f$loadings^2) if(any(is.nan(discrim))) {bad <- which(is.nan(discrim),arr.ind=TRUE) if(length(bad) > 0) { warning("An discrimination with a NaN value was replaced with the maximum discrimination for item(s) ",bad, "\nexamine the factor analysis object (fa) to identify the problem") } for (i in 1:nf) { discrimin[is.nan(discrim[,i])] <- max(discrimin[,i],na.rm=TRUE) }} irt <- list(difficulty=diffi,discrimination=discrim) result <- list(irt=irt,fa = f,rho=r,tau=tau,n.obs=n.obs,Call=cl) if(inherits(rho[2],"poly" )) {class(result) <- c("psych","irt.poly") } else {class(result) <- c("psych","irt.fa")} if(plot) {pr <- plot(result) result$plot <- pr} return(result) }psych/R/block.random.R0000644000176200001440000000224511364206714014304 0ustar liggesusers"block.random" <- function(n,ncond=NULL) { if(is.null(ncond)) {ncond <- 2 IVs <- 1 conditions <- c(ncond) } else { if (length(ncond) < 2) { IVs<- 1 conditions <- c(ncond)} else { IVs <- length(ncond) conditions <- ncond ncond <- prod(ncond)} } if(floor(n/ncond) * ncond != n) {stop("number of subjects much be a multiple of number of conditions")} blocks <- matrix(rep(NA,n*(1+IVs)),ncol=1+IVs) rcond <- rep(NA,n) if(is.null(names(conditions))) {colnames(blocks) <- c("blocks",paste("IV",1:IVs,sep=""))} else {colnames(blocks) <- c("blocks",names(conditions))} rownames(blocks) <- paste("S",1:n,sep="") for (block in 1:(n/ncond)) { blocks[((block-1)*ncond+1):(block*ncond),1] <- block rcond [((block-1)*ncond+1):(block*ncond)] <- sample(ncond,replace=FALSE) } for (i in 1:IVs) {if(i<2) { blocks[,i+1]<- ceiling( (rcond %% conditions[i] + 1))} else { blocks[,i+1]<- ceiling( (rcond %% prod(conditions[1:i]) +1 ) /prod(conditions[1:(i-1)])) }} return(blocks)}psych/R/phi.demo.R0000644000176200001440000000331214444460575013443 0ustar liggesusers"phi.demo" <- function(n=1000,r=.6 ,cuts =c(-2,-1,0,1,2) ) { #simulate correlation matrix with variable cut points -- psychometric demo #make up some correlations with different cut points latent <-rnorm(n) #make up some random normal theta scores err <- rnorm(n) #random normal error scores observed <- latent*(r) + err*sqrt(1-r*r) #observed = T + E #convert to 0/1 with different values of cuts trunc<- matrix(rep(observed,length(cuts)),ncol=length(cuts)) # for (i in 1:length(cuts)) { # trunc[observed > cuts[i],i] <- 1 # trunc[observed < cuts[i],i] <- 0} # for(i in 1:n) {for (j in 1:length(cuts)) {trunc[i,j] <- rbinom(1,1,trunc[i,j] > cuts[j])}} # for(i in 1:n) {for (j in 1:length(cuts)) {trunc[i,j] <-logistic(observed] > cuts[j])}} for (j in 1:length(cuts)) {trunc[,j] <- (trunc[,j] > cuts[j]) } d.mat<- data.frame(latent,observed,trunc) #combine into a data frame pairs.panels(d.mat,main="Phi coefficients for extreme cut point normal data") trunc.cor<- cor(d.mat) #find the Pearson correlations freq <- apply(d.mat,2,mean) #find the frequencies of scores #first demonstrate the tetrachoric function tetra <- tetrachoric(d.mat[,3:7],correct=FALSE) yule <- YuleCor(trunc) for (i in 4:length(d.mat)) { for (j in 3:i) { trunc.cor[j,i]<- phi2tetra(trunc.cor[i,j],c(freq[i],freq[j])) }} result <- list(tetrachoric=tetra, phis=trunc.cor, Yule = yule,data = d.mat) class(result) <- c('psych','phi.demo') return(result) } print_psych.phi <- function(x,digits=2) { print(x$tetrachoric) cat("\nPearson (phi) below the diagonal, phi2tetras above the diagonal\n") print(round(x$phis,digits)) cat("\nYule correlations") print(x$Yule) }psych/R/polychor.matrix.R0000644000176200001440000000111012154733570015066 0ustar liggesusers "Yule2poly.matrix" <- function(x,v) { .Deprecated("Yule2tetra", msg="This function has been replaced by Yule2tetra") } "phi2poly.matrix" <- function(x,v) { .Deprecated("phi2tetra",msg="This function has been replaced by phi2tetra") } "Yule2phi.matrix" <- function(x,v) { .Deprecated("Yule2phi",msg="This function has been replaced by Yule2phi") } #revised August 29, 2010 "poly.mat" <- function(x,short=TRUE,std.err=FALSE,ML=FALSE) { .Deprecated("polychoric",msg="poly.mat is deprecated. Please use the polychoric function instead.") return(polychoric(x)) } psych/R/ICLUST.graph.R0000644000176200001440000000625211020353750014030 0ustar liggesusers#created August 4, 2006 #last revised September 5, 2006 #create dot file for graphviz from ICLUST output #inspired by sem.path.diagram by J. Fox "ICLUST.graph" <- function(ic.results, out.file,min.size=1,short=FALSE,labels=NULL, size=c(8,6), node.font=c("Helvetica", 14), edge.font=c("Helvetica", 12), rank.direction=c("RL","TB","LR","BT"), digits=2,title="ICLUST", ...){ if(!missing(out.file)){ out <- file(out.file, "w") on.exit(close(out)) } else out <- stdout() results <- ic.results$results if (length(labels)==0) { var.labels <- rownames(ic.results$loadings)} else {var.labels=labels} # if(dim(var.labels)[1] < dim(var.labels)[2]) {var.labels <- t(var.labels)} clusters <- as.matrix(ic.results$clusters) #if(length(clusters)==length(var.labels) ) {clusters <- as.matrix(clusters)} num <- nrow(results) if (short) {var.labels <- paste("V",1:nrow,(var.labels),sep="")} rank.direction <- match.arg(rank.direction) #first some basic setup parameters cat( file=out,paste('digraph ICLUST', ' {\n', sep="")) cat(file=out, paste(' rankdir=', rank.direction, ';\n', sep="")) cat(file=out, paste(' size="',size[1],',',size[2],'";\n', sep="")) cat(file=out, paste(' node [fontname="', node.font[1], '" fontsize=', node.font[2], ' shape=box, width=2];\n', sep="")) cat(file=out, paste(' edge [fontname="', edge.font[1], '" fontsize=', edge.font[2], '];\n', sep="")) cat(file=out, paste(' label = "' ,title,'"; fontsize=20;\n', sep="")) #create the items as boxes #add the sign from the clusters num.var <- nrow(results)+1 #how many variables? if (num.var > dim(clusters)[1]) {num.var <- dim(clusters)[1]} for (i in 1:num.var) { if (max(clusters[i,]) > 0 ) { cat(file=out,paste('V',i,' [label = "',var.labels[i], '"];\n', sep="")) } else { cat(file=out,paste('V',i,' [label = "-',var.labels[i], '"];\n', sep="")) } } #show the cluster structure with ellipses cat(file=out,paste('node [shape=ellipse, width ="1"];\n', sep="")) #draw the edges for (i in 1:num) {if(results[i,1]>0) { #avoid printing null results cat(file=out,paste(row.names(results)[i], '-> ', results[i,1], ' [ label = ',round(results[i,"r1"],digits),' ];\n', sep="")) cat(file=out,paste(row.names(results)[i], '-> ', results[i,2], ' [ label = ',round(results[i,"r2"],digits),' ];\n', sep="")) }} #label the clusters with alpha and beta for (i in 1:num) {if(results[i,1]>0) { #don't print blank results if (results[i,"size"] > min.size) { cat(file=out,paste(row.names(results)[i], ' [label = "',row.names(results)[i],'\\n alpha= ',round(results[i,"alpha"],digits),'\\n beta= ' ,round(results[i,"beta"],digits),'\\nN= ',results[i,"size"], '"] ;\n', sep="")) } else {cat(file=out,paste(row.names(results)[i],' ;\n', sep="")) } #short names for small clusters }} #keep the boxes all at the same rank (presumably the left side) cat(file=out, paste('{ rank=same;\n', sep="")) for (i in 1:num.var) { cat(file=out,paste('V',i,';', sep="")) } cat(file=out, paste('}}', sep="")) # we are finished } # end of ICLUST.graph psych/R/glb.R0000644000176200001440000001334414503654075012506 0ustar liggesusers# an attempt at finding the worst and best splits, beta is worst split (from ICLUST) "glb" <- function(r,key=NULL) { cl <- match.call() nvar <- dim(r)[2] if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")} else {r <- cov2cor(r)} #make sure it is a correlation matrix not a covariance or data matrix if(is.null(colnames(r))) { rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") } m <- (1-r)/2 diag(m) <- 1 m.names <- colnames(m) if (!is.null(key)) { m <- diag(key) %*% m %*% diag(key) colnames(m) <- m.names #flip items if we choose to do so flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } signkey <- strtrim(key,1) signkey[signkey=="1"] <- "" m.names <- paste(m.names,signkey,sep="") colnames(m) <- rownames(m) <- m.names worst <- ICLUST(r,2,plot=FALSE) keys <- worst$p.sorted$cluster best <- ICLUST(m,2,plot=FALSE,SMC=FALSE) keys <- matrix(rep(0,nvar*2),ncol=2) keys <- best$p.sorted$cluster m1 <- r diag(m1) <- 0 best.kmeans <- kmeans(m,2,nstart=10) keys.kmean <- matrix(rep(0,nvar*2),ncol=2) for(i in 1:nvar) { keys.kmean[i,best.kmeans$cluster[i]] <- 1 } f1 <- fa(r) #one factor solution load <- f1$loadings ord.load <- order(load) key.fa <- matrix(rep(0,nvar*2),ncol=2) for (i in 1:nvar) { key.fa[ord.load[i],1] <- i %% 2 key.fa[ord.load[i],2] <- 1 - key.fa[ord.load[i],1] } f2 <- fa(r,2,SMC=FALSE) #two factor solution load <- f2$loadings key.fa2 <- matrix(rep(0,nvar*2),ncol=2) key.fa2[,1] <- (load[,1] > load[,2]) + 0 key.fa2[,2 ] <- 1- key.fa2[,1] e <- eigen(r)$values[1] alpha.pc <- 1-1/e glb.fa <- glb.fa(r) #based upon as many factors as possible, sort of omega total glb.nf <- glb.fa$numf keys <- cbind(worst$p.sorted$cluster,keys,keys.kmean,key.fa,key.fa2) colnames(keys) <- c("IC1","IC2","ICr1","ICr2","K1","K2","F1","F2","f1","f2") covar <- t(keys) %*% r %*% keys #matrix algebra is our friend var <- diag(covar) sd.inv <- 1/sqrt(var) ident.sd <- diag(sd.inv,ncol = length(sd.inv)) cluster.correl <- ident.sd %*% covar %*% ident.sd #convert covariances to correlations beta <- cluster.correl[2,1] *2 /(1+cluster.correl[2,1]) glbIC <- cluster.correl[3,4] *2 /(1+cluster.correl[3,4]) glb2 <- cluster.correl[5,6] * 2/(1+ cluster.correl[5,6] ) glb3 <- cluster.correl[7,8] * 2/(1+cluster.correl[7,8]) beta.fa <- cluster.correl[9,10] * 2/(1+cluster.correl[9,10]) glb.max <- max(glbIC,glb2,glb3) sum.smc <- sum(smc(r)) sum.r <- sum(r) gamma <- (sum.r+sum.smc-sum(diag(r)))/sum.r tenberg <- tenberge(r) result <- list(glb.fa=glb.fa,nf=glb.nf, beta = beta,beta.factor = beta.fa,alpha.pc = alpha.pc, glb.max = glb.max, glb.IC =glbIC, glb.Km = glb2, glb.Fa =glb3, r.smc = gamma, tenberge=tenberg, keys=keys,Call=cl) class(result) <- c("psych","glb") return(result) } #moved to print.psych # print.psych.glb <- function(x,digits=2,short=TRUE) { # cat("Call: ") # print(x$Call) # cat("\nEstimates of the Greatest Lower Bound for reliability, based on factor and cluster models") # # cat("\nGLB estimated from factor based communalities = ", round(x$glb.fa$glb,digits) ,"with ",x$nf, " factors.") # cat("\nUse glb.fa to see more details ") # cat("\n Various estimates based upon splitting the scale into two (see keys for the various splits)") # cat("\nBeta = ", round(x$beta,digits) , "\nBeta fa",round(x$beta.fa,digits), " This is an estimate of the worst split half reliability") # cat("\nKmeans clusters for best split ",round(x$glb.Km,digits=2)) # cat("\nCluster based estimates \nglb.IC = ",round(x$glb.IC,digits)) # cat("\nglb.max ", round(x$glb.max,digits),"Is the maximum of these estimates") # cat("\n alpha-PC = ", round(x$alpha.pc,digits),"An estimate of alpha based on eignvalues") #\nglb.IC) = " ,round(x$glb.IC,digits)) #"\nL5 = ", round(x$lambda.5,digits), "\nL6 (smc) = " ,round(x$lambda.6,digits), "\n") # cat("\nTenBerge bounds \nmu0 = ",round(x$tenberge$mu0,digits), "mu1 = ", round(x$tenberge$mu1,digits), "mu2 = " ,round(x$tenberge$mu2,digits), "mu3 = ",round(x$tenberge$mu3,digits) , "\n") # cat("\nestimated greatest lower bound based upon splitHalf = ", round(x$glb.Fa,digits),"\n") # # if(!short) {cat("\n Various ways of keying the results\n") # x$keys} else {cat("\nUse short = FALSE to see the various ways of splitting the scale")} # # } #find the communalties base upon as many factors as possble "glb.fa" <- function(r,key= NULL){ cl <- match.call() #for eventual fancy printing with the call listed nvar <- dim(r)[2] #find a correlation matrix if using a data matrix if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")} else { if(!is.matrix(r)) r <- as.matrix(r) r <- cov2cor(r)} #make sure it is a correlation matrix not a covariance or data matrix if(is.null(colnames(r))) { rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") } if (!is.null(key)) { key <- as.vector(key) r <- diag(key) %*% r %*% diag(key) flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } nv <- dim(r)[1] #how many variables f1 <- fa(r) #factor it #first find the eigen values of the factor model nf <-length(which(f1$values > 0)) #how many are real df <- nv * (nv-1)/2 - nf*nv + nf*(nf-1)/2 #check for degrees of freedom if (df < 0 ) nf <- nf-1 fn <- fa(r,nf,rotate="none") rr <- r diag(rr) <- fn$communality #based upon nf factors glb <- sum(rr)/sum(r) return(list(glb=glb,communality = fn$communality,numf = nf,Call=cl)) } psych/R/reverseKey.R0000644000176200001440000000064612205434363014060 0ustar liggesusers#written by David Stanley reverseKey=function(itemNames,itemNames2ReverseKey) { lengthItems=length(itemNames) lengthItemsUnique=length(unique(itemNames)) if (lengthItems!=lengthItemsUnique) { print("Item names are not unique: reverseKey will not work") return(NA) } keyValues=rep(1,lengthItems) matchKey=!is.na(match(itemNames,itemNames2ReverseKey)) keyValues[matchKey]=-1 return(keyValues) }psych/R/congeric.sim.R0000644000176200001440000000134111121644264014304 0ustar liggesusers"sim.congeneric" <- function(N = 1000, loads = c(0.8, 0.7, 0.6, 0.5), err=NULL, short=TRUE) { n <- length(loads) loading <- matrix(loads, nrow = n) error <- diag(1, nrow = n) if (!is.null(err)) {diag(error) <- err} else { diag(error) <- sqrt(1 - loading^2) } pattern <- cbind(loading, error) colnames(pattern) <- c("theta", paste("e", seq(1:n), sep = "")) rownames(pattern) <- c(paste("V", seq(1:n), sep = "")) model <- pattern %*% t(pattern) latent <- matrix(rnorm(N * (n + 1)), ncol = (n + 1)) observed <- latent %*% t(pattern) colnames(latent) <- c("theta", paste("e", seq(1:n), sep = "")) if (short) {return(model)} else {result <- list(model=model,pattern=pattern,observed=observed,latent=latent) return(result)} }psych/R/target.rot.R0000644000176200001440000000234612475366660014041 0ustar liggesusers"target.rot" <- function (x,keys=NULL) { if(!is.matrix(x) & !is.data.frame(x) ) { if(!is.null(x$loadings)) x <- as.matrix(x$loadings) } else {x <- x} if (ncol(x) < 2) return(x) dn <- dimnames(x) if(is.null(keys)) {Q <- factor2cluster(x)} else {Q <- keys} Q <- as.matrix(Q) if(dim(Q)[2] < 2) {stop("Cluster structure produces 1 cluster. Rotation is not meaningful with less than 2 factors")} U <- lm.fit(x, Q)$coefficients d <- diag(solve(t(U) %*% U)) U <- U %*% diag(sqrt(d)) dimnames(U) <- NULL z <- x %*% U ui <- solve(U) Phi <- ui %*% t(ui) dimnames(z) <- dn class(z) <- "loadings" result <- list(loadings = z, rotmat = U,Phi = Phi) class(result) <- c("psych","fa") return(result) } #Based upon Promax which was taken from the promax function with the addition of returning the angles between factors #based upon a suggestion to the R-help news group by Ulrich Keller and John Fox. #if keys is null, this is the Promax function #if keys are not null, this becomes a targeted rotation function similar to that suggested by Michael Brown #created April 6, 2009 with the assistance of Pat Shrout and Steve Miller #a better model is to call TargetQ psych/R/score.multiple.choice.R0000644000176200001440000001171213727764046016144 0ustar liggesusers "score.multiple.choice" <- function(key,data,score=TRUE,totals=FALSE,ilabels=NULL, missing=TRUE,impute="median", digits=2,short=TRUE,skew=FALSE) { #convert a data matrix or data with multiple choice responses to correct/incorrect cl <- match.call() if(!is.matrix(data)) {if(!is.data.frame(data)) {stop("data must be either a data frame or matrix!")} else data <- as.matrix(data)} nvar <- dim(data)[2] response.freq <- response.frequencies(data) alternatives <- dim(response.freq)[2] if(length(key)==nvar) { items <- t(t(data)==key[]) #scores as t/f items <- items + 0 #converts t/f to 1/0 } } else {stop("key must have as many elements as columns of 'data' ")} if (score) { if(skew) {item.stats <- describe(items,ranges=FALSE,skew=skew,fast=FALSE)[,2:7] } else { #added the fast=FALSE on 2/25/16 in response to problem with large data sets reported by Rodrigo Travitzki item.stats <- describe(items,ranges=FALSE,skew=skew,fast=TRUE)[,2:4] } miss.rep <- rowSums(is.na(items)) if(missing) { miss <- which(is.na(items),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(items,na.rm=TRUE) #replace missing with means items[miss]<- item.means[miss[,2]]} else { item.med <- apply(items,2,median,na.rm=TRUE) #or medians items[miss]<- item.med[miss[,2]]} } keys <- rep(1,nvar) #now, score the items as the sum of correct scores <- rowSums(items,na.rm=TRUE) slabels <- colnames(keys) if (is.null(slabels)) { if (totals) {slabels<- paste("Totals") } else { slabels <- paste("Averages")} } names(scores) <- slabels r.items <- cov(items,use="pairwise") sum.item.var <- tr(r.items) var.scales <- sum(r.items) alpha.scale <- (var.scales - sum.item.var)*nvar/((nvar-1)*var.scales) av.r <- alpha.scale/(nvar - alpha.scale*(nvar-1)) #alpha 1 = average r item.cor <- cor(items,scores,use="pairwise") #this does not correct for item overlap if(is.null(ilabels)) {ilabels <- paste("I",1:nvar,sep="")} rownames(item.cor) <- ilabels if (!totals) {scores <- scores/nvar } item.stats <- cbind(key,response.freq,item.cor,item.stats) colnames(item.stats)[alternatives+2] <- "r" if(short) {results <- list(item.stats=round(item.stats,digits),alpha=round(alpha.scale,digits), av.r=round(av.r,digits),Call=cl)} else if (sum(miss.rep) >0) {results <-list(scores=scores,missing = miss.rep,item.stats=round(item.stats,digits),alpha=round(alpha.scale,digits), av.r=round(av.r,digits))} else{ results <- list(scores=scores,item.stats=item.stats,alpha=round(alpha.scale,digits), av.r=round(av.r,digits),Call=cl)} class(results) <- c("psych","mchoice") return(results) } else {return (items)} } #introduce a function to get cell frequencies and compensate for possible different number of response alternatives "response.frequencies" <- function(items,max=10,uniqueitems=NULL) {responseFrequency(items=items,max=max,uniqueitems=uniqueitems)} "responseFrequency" <- function (items, max = 10,uniqueitems=NULL) { min.item <- apply(items,2,function(x) min(x,na.rm=TRUE)) max.item <- apply(items,2,function(x) max(x,na.rm=TRUE)) select <- (max.item - min.item) < (max +1) items <- items[,select] if(is.null(uniqueitems)) uniqueitems <- unique(as.vector(unlist(items))) if ( min((max.item - min.item ) > max) || (nlevels(factor(items[[ 1]])) > max) || #changed to [[ ]] following suggestion from Mikko Ronkko length(uniqueitems) > max) { frequency <- NULL message("Number of categories should be increased in order to count frequencies. ") } else { #just describe those items that have less than max categories n.var <- NCOL(items) #changed to handle case of single variable n.cases <- NROW(items) if(n.var < 2 ) message("Number of categories should be increased in order to count frequencies. ") dummy <- matrix(rep(uniqueitems, n.var), ncol = n.var) colnames(dummy) <- names(items) xdum <- rbind(items, dummy) frequency <- apply(xdum, 2, table) frequency <- t(frequency - 1) responses <- rowSums(frequency) frequency <- frequency/responses miss <- 1 - responses/n.cases frequency <- cbind(frequency, miss) # class(frequency) <- cs(psych,frequency) } return(frequency) } #version of Sept 19, 2007 #revised Sept 3, 2010 to count missing responses #revised Sept 7, 2010 to correctly handle missing data in terms of finding alpha and correlation #revised April 3, 2011 to incorporate very nice suggestion by Joshua Wiley to handle unique categories #revised August 4, 2012 to allow the specification of unique items-- useful for irt.responses #revised July 8, 2020 to use psych print function and added the camel case version. Also to process just those items that have <= max responsespsych/R/cortest.mat.R0000644000176200001440000000333611157336472014205 0ustar liggesusers"cortest.mat" <- function(R1,R2=NULL,n1=NULL,n2 = NULL) { cl <- match.call() p <- dim(R1)[2] if(dim(R1)[1] != p) { n1 <- dim(R1)[1] R1 <- cor(R1,use="pairwise") warning ("R1 matrix was not square, correlations found") } if(!is.matrix(R1)) R1 <- as.matrix(R1) #in case R1 is a data.frame if (is.null(n1)) {warning("n1 not specified, set to 100") n1 <-100} if (is.null(R2)) {message("Bartlett's test of is R = I") detR1 <- det(R1) chi2 <- -log(detR1) *(n1 -1 - (2*p + 5)/6) df <- p * (p-1)/2 pval <- pchisq(chi2,df,lower.tail=FALSE) n.obs <- n1 } else { if(dim(R2)[1] != dim(R2)[2] ) {n2 <- dim(R2)[1] R2 <- cor(R2,use="pairwise") warning ("R2 matrix was not square, correlations found") } if (p != dim(R2)[2]) stop("correlation matrices R1 and R2 must be of the same size!") if(!is.matrix(R2)) R2 <- as.matrix(R2) #in case R1 is a data.frame R1.inv <- solve(R1) #inverse of R R2.inv <- solve(R2) #inverse of R R.inv.2 <- R1.inv %*% R2 #inverse of R1 times R2 R.inv.1 <- R2.inv %*% R1 #inverse of R2 times R1 E1 <- .5*(sum((diag(R.inv.2))) -log(det(R.inv.2)) - p ) #likelihood E2 <- .5*(sum((diag(R.inv.1))) -log(det(R.inv.1)) - p ) #likelihood df1 <- p * (p-1)/2 df <- 2*df1 if (is.null(n2)) {n2 <- n1} n.obs <- min(n1,n2) chi21 <- E1 * (n1-1-(2*p - 5)/6) chi22 <- E2 * (n2-1-(2*p - 5)/6) chi2 <- chi21 + chi22} results <- list(chi2 =chi2,prob =pchisq(chi2,df,lower.tail=FALSE), df= df,n.obs=n.obs,Call =cl) class(results) <- c("psych","cortest") return(results) } psych/R/cor.plot.R0000644000176200001440000002001514464017522013467 0ustar liggesusers#developed April 24, 2009 #modifed November 14, 2009 to add legends #completely revised June 20, 2011 to do more reds for less than 0, blues for above 0 #also switched to using layout to make legend clearer #modified May 5, 2012 to add the keep.par option to allow more control of small graphics #Corrected feb 22, 2014 to not double plot text (reported by David Condon) #modified May 12 to allow for selection #modified March 28, 2016 to add the upper option #modified Feb 4, 2017 to label plots and to find correlations by default #modified April 15, 2017 to allow for non-symmetric matrices #modified April 15, 2017 to allow more plotting control on the x and y rotation options #modified November 29th, 2017 to allow for semi-transparency by adjusting alpha #Finally changed the default to be numbers=TRUE (9/17/19) #Added the sort option 6/13/23 #Added the ability to rotate x labels xsrt if xlas =0 #fixed the stars option for the case of lower diagonal so that it works #Added the n.obs paramter for people who like stars for correlation input 8/6/23 "cor.plot" <- "corPlot" <- function(r,numbers=TRUE,colors=TRUE, n=51,main=NULL,zlim=c(-1,1),show.legend=TRUE, labels=NULL, n.legend=10,keep.par=TRUE,select=NULL, pval=NULL,digits=2, trailing=TRUE, cuts=c(.001,.01),scale=TRUE,cex,MAR,upper=TRUE, diag=TRUE,symmetric=TRUE,stars=FALSE,adjust="holm", xaxis =1, xlas=0,ylas=2,ysrt=0, xsrt=0, gr=NULL,alpha =.75, min.length=NULL, sort=FALSE, n.obs=NULL, ...){ if(keep.par) op <- par(no.readonly=TRUE) if(missing(MAR)) MAR <- 5 if(!is.matrix(r) & (!is.data.frame(r))) {if((length(class(r)) > 1) & (inherits(r, "psych"))) { switch(class(r)[2], omega = {r <- r$schmid$sl nff <- ncol(r) r <- r[,1:(nff-3)] if(is.null(main)) {main <- "Omega plot" }}, cor.ci ={ pval <- 2*(1-r$ptci) r <- r$rho}, fa = {r <- r$loadings if(is.null(main)) {main <- "Factor Loadings plot" }}, pc = {r <- r$loadings if(is.null(main)) {main <- "PCA Loadings plot" } }, principal = {r <- r$loadings if(is.null(main)) {main <- "PCA Loadings plot" }} ) #end switch } } else { if(symmetric & !isCorrelation(r) & (nrow(r) !=ncol(r))) { cp <- corr.test(r,adjust=adjust) #find the corelations r <- cp$r pval <- cp$p if(is.null(main)) {main <- "Correlation plot from data" } } else {scale<- FALSE if(is.null(main)) {main <- "Correlation plot"} } if(isCorrelation(r)& !is.null(n.obs) ) pval <- corr.p(r,n.obs)$p } R <- r <- as.matrix(r) if(!is.null(select)) r <- r[select,select] if(sort) {ord <- iclust(r,plot=FALSE)$ord r <- r[ord,ord]} if(min(dim(r)) < 2) {stop ("You need at least two dimensions to make a meaningful plot")} if(is.null(n)) {n <- dim(r)[2]} nf <- dim(r)[2] nvar <- dim(r)[1] if(!upper) r[col (r) > row(r) ] <- NA #blank out the upper diagonal if(!diag) r[col(r) == row(r)] <- NA #and the diagonal if(nf == nvar) r <- t(r) # flip the matrix because grid requires it but don't flip if a loadings matrix if(missing(pval)|is.null(pval)) {pval <- matrix(rep(1,nvar*nf),nvar)} else {if (length(pval) != nvar*nf) { pr = matrix(0,nvar,nf) pr [row(pr) > col(pr)] <- pval # pr[pval[pr]] <- pval pr <- pr + t(pr) diag(pr) <- 0 pval <- pr} if(!stars) {pval <- con2cat(pval,cuts=cuts) pval <- (length(cuts)+1-pval)/length(cuts)} pval <- t(pval) #flip these so that the lower off diagonal will be unadjusted } if(is.null(labels)) { if(is.null(rownames(r))) rownames(r) <- paste("V",1:nvar) if(is.null(colnames(r))) colnames(r) <- paste("V",1:nf) } else {rownames(r) <- colnames(r) <- labels} if(!is.null(min.length)) { rownames(r) <- abbreviate(rownames(r),minlength = min.length) colnames(r) <- abbreviate(colnames(r),minlength = min.length) } max.len <- max(nchar(rownames(r)))/6 #max.len <- max( strwidth(rownames(r))) if(is.null(zlim)) {zlim <- range(r)} if(colors) { if(missing(gr)) {gr <- colorRampPalette(c("red","white","blue"))} #added June 20, 2018? if(max(r,na.rm=TRUE) > 1) {#add a fudge to make all the big ones the same maxr <- max(r) n1 <- n*(zlim[2]- zlim[1])/(maxr- zlim[1]) colramp <- rep(NA,n) n1 <- ceiling(n1) colramp[1:(n1+1)] <- gr(n1+1) colramp[(n1+1):n] <- colramp[n1+1] zlim[2] <- maxr } else { colramp <- gr(n)} } else { colramp <- grey((n:0)/n)} colramp <- adjustcolor(colramp,alpha.f =alpha) if(nvar != nf) { r <- t(r) } #if(!is.null(select)) {r <- r[select,select] # pval <- pval[select,select] # nvar <- length(select) # } #reverse the order of the columns (if square) ord1 <- seq(nvar,1,-1) if(nf == nvar) {r <- r[,ord1] pval <- pval[,ord1]} else {r <- r[,ord1] pval <- t(pval[ord1,])} #reorder the columns to allow image to work #MAR <- 5 par(mar = c(MAR +max.len,MAR+max.len, 4, .5)) if(show.legend) { #set it up to do two plots layout(matrix(c(1,2),nrow=1),widths=c(.9,.1),heights=c(1,1)) } image(r,col=colramp,axes=FALSE,main=main,zlim=zlim) box() #if(nf < nvar) { at1 <- (0:(nf-1))/(nf-1) at2 <- (0:(nvar-1)) /(nvar-1) lab1 <- rownames(r) lab2 <- colnames(r) # } else { # at1 <- (0:(nf-1)) /(nf-1) # at2 <- (0:(nvar-1)) /(nvar-1) # lab1 <- rownames(r) # lab2 <- colnames(r) # } #if(nvar != nf) { r <- t(r) } if(xaxis == 3) {line <- -.5 tick <- FALSE} else {line <- NA tick <- TRUE} if(max.len >.5) {axis(2,at=at2,labels=lab2,las=ylas,...) if(xsrt==0) { axis(xaxis,at=at1,labels=lab1,las=xlas,line=line,tick=tick,...)} else { axis(xaxis,at=at1,labels=FALSE,las=xlas,line=line,tick=tick,...) text(at1, par("usr")[3]*3, labels = lab1, srt = xsrt, xpd=TRUE,...) }} else { axis(2,at=at2,labels=lab2,las=ylas,...) if(xsrt==0) { axis(xaxis,at=at1,labels=lab1,las=xlas,line=line,tick=tick,...)} else { axis(xaxis,at=at1,labels=FALSE,las=xlas,line=line,tick=tick,...) text(at1, par("usr")[3]*3, labels = lab1, srt = xsrt, xpd=TRUE,...) } } #at1 <- (0:(nf-1))/(nf-1) if(numbers) {rx <- rep(at1,ncol(r)) ry <-rep(at2,each=nrow(r)) # rv <- round(r*100) rv <- round(r,digits) if(trailing) {rv <- sprintf("%.2f", rv)# format with trailing 0rv rv[rv=="NA"] <- "" #drop the NAs for lower Cors } if(stars) {#pval <- corr.p1(r,npairs,"none" symp <- symnum(pval, corr = FALSE,cutpoints = c(0, .001,.01,.05, 1), symbols = c("***","**","*"," "),legend=FALSE) rv[(!is.na(rv))] <- paste0(rv[(!is.na(rv))],symp[!is.na(rv)]) rv <- matrix(rv,ncol=ncol(pval)) if(!upper) rv[ncol(rv)+diag - row(rv) < col(rv)] <- "" if(missing(cex)) cex = 9/max(nrow(r),ncol(r)) text(rx,ry,rv,cex=cex,...) } else { if(missing(cex)) cex = 9/max(nrow(r),ncol(r)) if(scale) { text(rx,ry,rv,cex=pval*cex,...) } else { text(rx,ry,rv,cex= 1.5*cex,...) }} } if(show.legend) { leg <- matrix(seq(from=zlim[1],to=zlim[2],by =(zlim[2] - zlim[1])/n),nrow=1) #screen(2) par(mar=c(MAR,0, 4,3)) image(leg,col=colramp,axes=FALSE,zlim=zlim) at2 <- seq(0,1,1/n.legend) labels =seq(zlim[1],zlim[2],(zlim[2]-zlim[1])/(length(at2)-1)) axis(4,at=at2,labels =labels,las=2,...) } if(keep.par) par(op) #return the parameters to where we started invisible(R) #added 11/26/18 } #used to find p values for corPlot #just an internal function #notice that the matrix is flipped to do the plotting #we need to find the lowerright "corr.p1" <- function(r,n,adjust="holm") { t <- (r*sqrt(n-2))/sqrt(1-r^2) p <- 2*(1 - pt(abs(t),(n-2))) p[p>1] <- 1 if (adjust !="none") { p[] <- p.adjust(p ,adjust) #the case of an asymmetric matrix } result <- p return(result) } "reflect" <- function(m) { NR <- NROW(m) NC <- NCOL(m) m[NR+1 - row(m)] <- m[row(m)] m[NC +1 - col(m)] <- m[col(m)]}psych/R/paired.r.R0000644000176200001440000000175613241364234013443 0ustar liggesusers"paired.r" <- function(xy,xz,yz=NULL,n,n2=NULL,twotailed=TRUE) { cl <- match.call() if (!is.null(yz)) { diff <- xy-xz determin=1-xy*xy - xz*xz - yz*yz + 2*xy*xz*yz av=(xy+xz)/2 cube= (1-yz)*(1-yz)*(1-yz) t2 = diff * sqrt((n-1)*(1+yz)/(((2*(n-1)/(n-3))*determin+av*av*cube))) p <- pt(abs(t2),n-3,lower.tail=FALSE) #changed to n-3 12/15/18 if(twotailed) p <- 2*p value <- list(test="test of difference between two correlated correlations",t=t2,p=p,Call=cl) } else { xy.z <- 0.5*log((1+xy)/(1-xy)) xz.z <- 0.5*log((1+xz)/(1-xz)) if(is.null(n2)) n2 <- n se.diff.r <- sqrt(1/(n-3) + 1/(n2-3)) diff <- xy.z - xz.z z <- abs(diff/se.diff.r) p <- (1-pnorm(z )) if(twotailed) p <- 2*p value <- list(test="test of difference between two independent correlations",z=z,p=p,Call=cl) } class(value) <- c("psych","paired.r") return(value) } psych/R/guttman.R0000644000176200001440000001130712247407205013410 0ustar liggesusers#revised December 2, 2013 to take advantage of the splitHalf function "guttman" <- function(r,key=NULL) { cl <- match.call() .Deprecated("splitHalf",msg="Guttman has been deprecated. The use of the splitHalf function is recommended") nvar <- dim(r)[2] if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")} else { if(!is.matrix(r)) r <- as.matrix(r) r <- cov2cor(r)} #make sure it is a correlation matrix not a covariance or data matrix if(is.null(colnames(r))) { rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") } if (!is.null(key)) { key <- as.vector(key) r <- diag(key) %*% r %*% diag(key) flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } m <- (1-r)/2 diag(m) <- 1 m.names <- colnames(r) colnames(m) <- m.names #flip items if we choose to do so signkey <- strtrim(key,1) signkey[signkey=="1"] <- "" m.names <- paste(m.names,signkey,sep="") colnames(m) <- rownames(m) <- m.names if(nvar < 3) {message("These estimates are not really meaningful if you have less than 3 items, \n Try running the alpha function instead") stop()} # beta <- ICLUST(r,1,plot=FALSE)$beta # worst <- ICLUST(r,2,plot=FALSE) # w.keys <- worst$p.sorted$cluster #the following was a crude attempt at finding the best #this has been replaced with calling splitHalf best <- splitHalf(r) # best <- ICLUST(m,2,plot=FALSE,SMC=FALSE) #best <- ICLUST(m,2,plot=FALSE) #keys <- matrix(rep(0,nvar*2),ncol=2) #b.keys <- best$p.sorted$cluster # m1 <- r #diag(m1) <- 0 # best.kmeans <- kmeans(m,2,nstart=10) #keys.kmean <- matrix(rep(0,nvar*2),ncol=2) # for(i in 1:nvar) { # keys.kmean[i,best.kmeans$cluster[i]] <- 1 } f1 <- fa(r,SMC=FALSE) #one factor solution load <- f1$loadings ord.load <- order(load) key.fa <- matrix(rep(0,nvar*2),ncol=2) for (i in 1:nvar) { key.fa[ord.load[i],1] <- i %% 2 key.fa[ord.load[i],2] <- 1 - key.fa[ord.load[i],1] } f2 <- fa(r,2,SMC=FALSE) #two factor solution load <- f2$loadings key.fa2 <- matrix(rep(0,nvar*2),ncol=2) key.fa2[,1] <- (abs(load[,1]) > abs(load[,2])) + 0 key.fa2[,2 ] <- 1- key.fa2[,1] ev <-eigen(r)$values e <- ev[1] alpha.pc <- (1-1/e) * nvar/(nvar-1) #alpha.pc2 <- (1-1/ev[2]) * nvar/(nvar-1) r.pc <- 2*ev[1]/(ev[1]+ev[2])-1 r.pc <- r.pc * alpha.pc #attenuate the correlation beta.pc <- 2 * r.pc/(1+r.pc) Vt <- sum.r <- sum(r) tr.r <- tr(r) lambda.1 <- 1 - tr.r/Vt off <- r diag(off) <- 0 sum.off <- sum(off) sumsq.off <- sum(off^2) lambda.2 <- (sum.off+ sqrt(sumsq.off*nvar/(nvar-1)))/Vt lambda.3 <- nvar * lambda.1/(nvar-1) sum.smc <- sum(smc(r)) lambda.6 <-(sum.r+sum.smc-sum(diag(r)))/Vt c.co <- colSums(r^2)-diag(r^2) c.co.max <- max(c.co) lambda.5 <- lambda.1 + 2*sqrt(c.co.max)/Vt lambda.5p <- lambda.1 +(nvar)/(nvar-1)* 2*sqrt(c.co.max)/Vt # #this next section is a complete kludge meant to find the most similar splits #a better way is to use the glb function of Andreas Moeltner #revised February 11 to implement equation 51 of Guttman, not 51' # #all of this has been deleted as of December, 2013 to just us splitHalf #keys <- cbind(w.keys,b.keys,keys.kmean,key.fa,key.fa2) #try(colnames(keys) <- c("IC1","IC2","ICr1","ICr2","K1","K2","F1","F2","f1","f2")) #covar <- t(keys) %*% r %*% keys #matrix algebra is our friend # var <- diag(covar) # sd.inv <- 1/sqrt(var) # ident.sd <- diag(sd.inv,ncol = length(sd.inv)) # cluster.correl <- ident.sd %*% covar %*% ident.sd #beta <- abs(cluster.correl[2,1]) *2 /(1+abs(cluster.correl[2,1])) #worst split # beta <- 2 * (1-2/(2+abs(cluster.correl[2,1]))) #glb1 <- cluster.correl[3,4] *2 /(1+cluster.correl[3,4]) # glb2 <- cluster.correl[5,6] * 2/(1+ cluster.correl[5,6] ) # glb3 <- cluster.correl[7,8] * 2/(1+cluster.correl[7,8]) #Vtcl1 <- covar[3,3]+ covar[4,4] + 2 * covar[3,4] #Vtcl2 <- covar[5,5]+ covar[6,6] + 2 * covar[5,6] #Vtcl3 <- covar[7,7]+covar[8,8] + 2 * covar[7,8] #glbIC <- 2*(1-(covar[3,3]+ covar[4,4])/Vtcl1 ) #glb2 <- 2*(1-(covar[5,5]+ covar[6,6])/Vtcl2 ) #glb3 <- 2*(1-(covar[7,7]+ covar[8,8])/Vtcl3 ) #beta.fa <- cluster.correl[9,10] * 2/(1+cluster.correl[9,10]) # glb.max <- max(glbIC,glb2,glb3) sum.smc <- sum(smc(r)) glb <- glb.fa(r)$glb beta <- best$minrb if(beta < 0) beta <- 0 gamma <- (sum.r+sum.smc-sum(diag(r)))/Vt tenberg <- tenberge(r) result <- list(lambda.1=lambda.1,lambda.2=lambda.2,lambda.3=lambda.3,lambda.4 =best$maxrb,lambda.5 = lambda.5,lambda.5p = lambda.5p,lambda.6=lambda.6,alpha.pc = alpha.pc, glb=glb, tenberge=tenberg,r.pc=r.pc,beta.pc=beta.pc,beta=beta,Call=cl) class(result) <- c("psych","guttman") return(result) } psych/R/print.psych.omega.R0000644000176200001440000002207214444434602015304 0ustar liggesusers"print_psych.omega" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { xx <- x if(!is.null(x$ci)) { x <- x$om} if(is.null(cut)) cut <- .2 cat( x$title,"\n") cat("Call: ") print(x$call) cat("Alpha: ",round(x$alpha,digits),"\n") cat("G.6: ",round(x$G6,digits),"\n") cat("Omega Hierarchical: " ,round(x$omega_h,digits),"\n") cat("Omega H asymptotic: " ,round(x$omega.lim,digits),"\n") cat("Omega Total " ,round(x$omega.tot,digits),"\n") cat("\nSchmid Leiman Factor loadings greater than ",cut, "\n") loads <- x$schmid$sl nfactor <- ncol(loads)-3 if(sort) { ord <- sort(abs(loads[,1]),decreasing=TRUE,index.return=TRUE) loads[,] <- loads[ord$ix,] rownames(loads) <- rownames(loads)[ord$ix] loads <- cbind(v=ord$ix,loads) } #end sort tn <- colnames(loads) loads <- data.frame(loads) colnames(loads) <- tn #this seems weird, but otherwise we lose the F* name if(sort) {loads[,1] <- as.integer(loads[,1]) load.2 <- loads[,2:(nfactor+1)]} else {load.2 <- loads[,1:nfactor] } h2 <- round(loads[,"h2"],digits) u2 <- round(loads[,"u2"],digits) loads <-round(loads,digits) fx <- format(loads,digits=digits) nc <- nchar(fx[1,3], type = "c") fx[abs(loads)< cut] <- paste(rep(" ", nc), collapse = "") p2 <- loads[,"p2"] mp2 <- mean(p2) vp2 <- var(p2) p2 <- round(p2,digits) print(cbind(fx[,1:(nfactor+sort)],h2,u2,p2),quote="FALSE") numfactors <- dim(x$schmid$sl)[2] -3 eigenvalues <- diag(t(x$schmid$sl[,1:numfactors]) %*% x$schmid$sl[,1:numfactors]) cat("\nWith Sums of squares of:\n") ev.rnd <- round(eigenvalues,digits) print(ev.rnd,digits=digits) maxmin <- max(eigenvalues[2:numfactors])/min(eigenvalues[2:numfactors]) gmax <- eigenvalues[1]/max(eigenvalues[2:numfactors]) cat("\ngeneral/max " ,round(gmax,digits)," max/min = ",round(maxmin,digits)) cat("\nmean percent general = ",round(mp2,digits), " with sd = ", round(sqrt(vp2),digits), "and cv of ",round(sqrt(vp2)/mp2,digits),"\n") if(!is.null(x$ECV)) cat("Explained Common Variance of the general factor = ", round(x$ECV,digits),"\n") if(!is.null(x$schmid$dof)) {cat("\nThe degrees of freedom are",x$schmid$dof," and the fit is ",round(x$schmid$objective,digits),"\n") if(!is.null(x$schmid$n.obs)&&!is.na(x$schmid$n.obs)) {cat("The number of observations was ",x$schmid$n.obs, " with Chi Square = ",round(x$schmid$STATISTIC,digits), " with prob < ", signif(x$schmid$PVAL,digits))} } if(!is.null(x$schmid$rms)) {cat("\nThe root mean square of the residuals is ", round(x$schmid$rms,digits),"\n") } if(!is.null(x$schmid$crms)) {cat("The df corrected root mean square of the residuals is ", round(x$schmid$crms,digits)) } if(!is.null(x$schmid$RMSEA)) {cat("\nRMSEA index = ",round(x$schmid$RMSEA[1],digits+1), " and the", (1- x$schmid$RMSEA[4])*100,"% confidence intervals are ",round(x$schmid$RMSEA[2:3],digits+1)) } if(!is.null(x$schmid$BIC)) {cat("\nBIC = ",round(x$schmid$BIC,digits))} cat("\n\nCompare this with the adequacy of just a general factor and no group factors") if(!is.null(x$gstats$dof)) {cat("\nThe degrees of freedom for just the general factor are",x$gstats$dof," and the fit is ",round(x$gstats$objective,digits),"\n") if(!is.null(x$gstats$n.obs)&&!is.na(x$gstats$n.obs)) {cat("The number of observations was ",x$gstats$n.obs, " with Chi Square = ",round(x$gstats$STATISTIC,digits), " with prob < ", signif(x$gstats$PVAL,digits))} } if(!is.null(x$gstats$rms)) {cat("\nThe root mean square of the residuals is ", round(x$gstats$rms,digits),"\n") } if(!is.null(x$gstats$crms)) {cat("The df corrected root mean square of the residuals is ", round(x$gstats$crms,digits),"\n") } if(!is.null(x$gstats$RMSEA)) {cat("\nRMSEA index = ",round(x$gstats$RMSEA[1],digits+1), " and the", (1- x$gstats$RMSEA[4])*100,"% confidence intervals are ",round(x$gstats$RMSEA[2:3],digits+1)) } if(!is.null(x$gstats$BIC)) {cat("\nBIC = ",round(x$gstats$BIC,digits),"\n")} stats.df <- t(data.frame(sqrt(x$stats$R2),x$stats$R2,2*x$stats$R2 -1)) cat("\nMeasures of factor score adequacy \n") rownames(stats.df) <- c("Correlation of scores with factors ","Multiple R square of scores with factors ","Minimum correlation of factor score estimates") print(round(stats.df,digits)) #cat("\nMeasures of factor score adequacy ",names(eigenvalues)) #cat("\nCorrelation of scores with factors ",round(sqrt(x$stats$R2),digits)) #cat("\nMultiple R square of scores with factors " ,round(x$stats$R2,digits)) #cat("\nMinimum correlation of factor score estimates ", round(2*x$stats$R2 -1,digits),"\n") cat("\n Total, General and Subset omega for each subset\n") colnames(x$omega.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") print(round(t(x$omega.group),digits)) #now, see if there are any confidence intervals to report if(!is.null(xx$ci)) { cat("\n Estimates and bootstrapped confidence intervals\n") li <- data.frame(lower=xx$ci$ci[,1],estimate=xx$ci$means,upper=xx$ci$ci[,2]) li[1,2] <- x$omega_h li[2,2] <- x$alpha li[3,2] <- x$omega.tot li[4,2] <- x$G6 li[5,2] <- x$omega.lim print(li,digits=digits)} } "print_psych.omegaSem" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { if(is.null(cut)) cut <- .2 cat( x$title,"\n") if(!is.null(x$Call)) {# we have run this from omegaSem so we should print omega results first cat("Call: ") print(x$Call) print_psych.omega(x$omegaSem,digits=digits,all=all,cut=cut,sort=sort,...) x <- x$omega.efa } loads <- x$cfa.loads class(loads) <- NULL nfactor <- ncol(loads) cat("\n The following analyses were done using the ", x$sem," package \n") if(nfactor > 1) { cat("\n Omega Hierarchical from a confirmatory model using sem = ", round(x$omega,digits)) } else { cat("\n With only 1 factor specified in the sem model, we can only calculate omega Total.\n You should probably rerun the sem specifying either a bifactor or hierarchical model.\n") } cat("\n Omega Total from a confirmatory model using sem = ", round(x$omega.tot,digits),"\n") cat("With loadings of \n") loads <- data.frame(loads) if(nfactor > 1) { tn <- c("g", paste0("F",1:(nfactor-1),"*")) colnames(loads) <- tn } #this seems weird, but otherwise we lose the F* name } load.2 <- as.matrix(loads) h2 <- round(rowSums(load.2^2),digits) loads <- round(loads,digits) fx <- format(loads,digits=digits) if(nfactor > 1 ) { nc <- nchar(fx[1,3], type = "c") fx[abs(loads)< cut] <- paste(rep(" ", nc), collapse = "")} h2 <- round(rowSums(load.2^2),digits) u2 <- 1 - h2 p2 <- loads[,1]^2/h2 mp2 <- mean(p2) vp2 <- var(p2) p2 <- round(p2,digits) print(cbind(fx,h2,u2,p2),quote="FALSE") loads <- as.matrix(load.2) eigenvalues <- diag(t(loads) %*% loads) cat("\nWith sum of squared loadings of:\n") ev.rnd <- round(eigenvalues,digits) print(ev.rnd,digits=digits) maxmin <- max(eigenvalues[2:nfactor])/min(eigenvalues[2:nfactor]) gmax <- eigenvalues[1]/max(eigenvalues[2:nfactor]) ECV <- eigenvalues[1]/sum(eigenvalues) if(!is.null(x$Fit)) { cat("\nThe degrees of freedom of the confirmatory model are ",x$Fit[[1]]$df, " and the fit is ", x$Fit[[1]]$stat, " with p = ",x$Fit[[1]]$pvalue) } cat("\ngeneral/max " ,round(gmax,digits)," max/min = ",round(maxmin,digits)) cat("\nmean percent general = ",round(mp2,digits), " with sd = ", round(sqrt(vp2),digits), "and cv of ",round(sqrt(vp2)/mp2,digits),"\n") cat("Explained Common Variance of the general factor = ", round(ECV,digits),"\n") if(nfactor > 1) { cat("\nMeasures of factor score adequacy \n") # rownames(stats.df) <- c("Correlation of scores with factors ","Multiple R square of scores with factors ","Minimum correlation of factor score estimates") fsa.df <- t(data.frame(sqrt(x$gR2),x$gR2,2*x$gR2 -1)) rownames(fsa.df) <- c("Correlation of scores with factors ","Multiple R square of scores with factors ","Minimum correlation of factor score estimates") colnames(fsa.df) <- tn print(round(fsa.df,digits)) cat("\n Total, General and Subset omega for each subset\n") colnames(x$omega.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") rownames(x$omega.group) <- tn print(round(t(x$omega.group),digits)) } cat("\nTo get the standard sem fit statistics, ask for summary on the fitted object") } psych/R/fa.sort.R0000644000176200001440000001452513573034307013314 0ustar liggesusers#modified Sept 5, 2016 to sort Structure as well as loadings "fa.sort" <- function(fa.results,polar=FALSE) { omega <- FALSE con.i <- FALSE fa.ci <- extension <-extend <- NA #put in to avoid being identified as not defined. Seems nuts Structure <- NULL #in case we are not doing fa # if(length(class(fa.results)) > 1) { value <- class(fa.results)[2] } else {value="other"} #This next section was added December 7, 2019 to change from class(x)[2] to inherits(x, ...) if(length(class(fa.results)) > 1) { names <- cs(omega,omegaSem, fa.ci, iclust, fa, principal, extension, extend) value <- inherits(fa.results,names,which=TRUE) # value <- class(x)[2] if(any(value > 1) ) { value <- names[which(value > 0)]} else {value <- "other"} } else {value <- "other"} switch(value, omega = { omega <- TRUE omegaSem <- FALSE factors <- as.matrix(fa.results$schmid$oblique) sl <- fa.results$schmid$sl}, omegaSem = {omega=TRUE omegaSem <- TRUE factors <- as.matrix(fa.results) sl <- as.matrix(fa.results) }, fa.ci = {factors <- fa.results$loadings if(!is.null(fa.results$Phi)) {Phi <-fa.results$Phi } con.i <- TRUE ci <- fa.results$cis$ci cip <- fa.results$cis$p }, iclust = {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi}}, fa = {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi} Structure <- fa.results$Structure}, principal = {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi}}, extension = {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi}}, extend = {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Structure)) Structure <- fa.results$Structure if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi}}, other = {factors <- fa.results}) #now we have found the factor loadings from the various possibilities nitems <- dim(factors)[1] nfactors <- dim(factors)[2] total.ord <- rep(NA,nitems) if(polar) { pol.ord <- polar(factors)[,1] factors[1:nitems,] <- factors[pol.ord,] rownames(factors)[1:nitems] <- rownames(factors)[pol.ord ] } else { if(is.null(rownames(factors))) {rownames(factors) <- paste("V",1:nitems)} loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems)) #first sort them into clusters #first find the maximum for each row and assign it to that cluster loads$cluster <- apply(abs(factors),1,which.max) ord <- sort(loads$cluster,index.return=TRUE) factors[1:nitems,] <- factors[ord$ix,] if(!is.null(Structure)) {Structure[1:nitems,] <- Structure[ord$ix,] rownames(Structure)[1:nitems] <- rownames(Structure)[ord$ix] } rownames(factors)[1:nitems] <- rownames(factors)[ord$ix] total.ord <- ord$ix if(con.i) { ci[1:nitems,] <- ci[ord$ix,] #if we are doing confidence intervals cip[1:nitems,] <- cip[ord$ix,] } #now sort column wise #now sort the loadings that have their highest loading on each cluster items <- table(loads$cluster) #how many items are in each cluster? first <- 1 item <- loads$item for (i in 1:length(items)) { if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(factors[first:last,i]),decreasing=TRUE,index.return=TRUE) factors[first:last,] <- factors[item[ord$ix+first-1],] loads[first:last,1] <- item[ord$ix+first-1] if(!is.null(Structure) ) {Structure[first:last,] <- Structure[item[ord$ix+first-1],] rownames(Structure)[first:last] <- rownames(Structure)[ord$ix+first-1] } rownames(factors)[first:last] <- rownames(factors)[ord$ix+first-1] if(con.i) { ci[first:last,] <- ci[item[ord$ix+first-1],] #if we are doing confidence intervals cip[first:last,] <- cip[item[ord$ix+first-1],] } total.ord[first:last] <- total.ord[ord$ix+first-1 ] first <- first + items[i] } } } if(omega) {if(!omegaSem) fa.results$schmid$oblique <- factors #if the input was from omega, then sort the schmid leiman solution as well loads <- data.frame(item=seq(1:nitems),cluster=rep(0,nitems)) nfactors <- dim(sl)[2]-4 #g, h2, u2, p2 if(omegaSem) nfactors <- NCOL(sl) -1 if(nfactors > 1) { loads$cluster <- apply(abs(sl[,2:(nfactors+1)]),1,which.max) +1} else {loads$cluster <- rep(1,nitems) } ord <- sort(loads$cluster,index.return=TRUE) sl[1:nitems,] <- sl[ord$ix,] rownames(sl)[1:nitems] <- rownames(sl)[ord$ix] items <- table(loads$cluster) #how many items are in each cluster? first <- 1 item <- loads$item for (i in 1:length(items)) { if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(sl[first:last,i+1]),decreasing=TRUE,index.return=TRUE) sl[first:last,] <- sl[item[ord$ix+first-1],] loads[first:last,1] <- item[ord$ix+first-1] rownames(sl)[first:last] <- rownames(sl)[ord$ix+first-1] first <- first + items[i] } } if(omegaSem) {fa.results <- sl } else { fa.results$schmid$sl <- sl} } else { if((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {fa.results$loadings <- factors if(con.i) { rownames(ci) <- rownames(factors) fa.results$ci <- ci rownames(cip) <- rownames(factors) colnames(cip) <- colnames(factors) fa.results$cip <- cip} } else { fa.results <- factors} } #note that h2 and complexities were not sorted, we need to do this now if(is.list(fa.results)) {fa.results$order <- total.ord fa.results$complexity <- fa.results$complexity[total.ord] fa.results$communality <- fa.results$communality[total.ord] fa.results$uniquenesses <- fa.results$uniquenesses[total.ord] if(!is.null(Structure)) { fa.results$Structure <- Structure} } return(fa.results) } psych/R/cluster.loadings.R0000644000176200001440000000633013571766716015231 0ustar liggesuserscluster.loadings <- function (keys, r.mat, correct = TRUE,SMC=TRUE) { cl <- match.call() if (!is.matrix(keys)) { keys <- as.matrix(keys)} r.mat[is.na(r.mat)] <- -9999999 item.sd <- sqrt(diag(r.mat)) item.covar <- r.mat %*% keys #item by cluster covariances covar <- t(keys) %*% item.covar #variance/covariance of clusters var <- diag(covar) sd.inv <- 1/sqrt(var) #items corrected for communality lead to the Guttman G6 estimate if(SMC) {r.smc <- smc(r.mat) r.smc[r.smc < 0 ] <- 1 #for a very weird condition diag(r.mat) <- r.smc } else { diag(r.mat) <- 0 item.max <- apply(r.mat,1,max) diag(r.mat) <- item.max} c.item.covar <- r.mat %*% keys #this uses the communality estimate and thus corrects for item overlap c.covar <- t(keys) %*% c.item.covar c.var <- diag(c.covar) G6 <- c.var/var n.keys <- dim(keys)[2] if(n.keys >1) { c.item.cor <- c.item.covar %*% sqrt(diag(1/c.var))/item.sd } else {c.item.cor <- c.item.covar/sqrt(c.var*item.sd) } key.count <- diag(t(keys) %*% keys) #how many items in each cluster? if (correct) { cluster.correct <- diag((key.count/(key.count - 1))) for (i in 1:dim(keys)[2]) { if (key.count[i]<2 ) { #fix the case of 1 item keys cluster.correct[i,i] <- 1 } else { cluster.correct[i,i] <- key.count[i]/(key.count[i]-1) item.covar[,i] <- item.covar[,i] - keys[,i]} #subtract the variance of the item } #i loop correction.factor <- keys %*% cluster.correct #put back average correlation for the item if it loads on the key correction.factor[ correction.factor < 1] <- 1 item.covar <- item.covar * correction.factor } ident.sd <- diag(sd.inv, ncol = length(sd.inv)) c.correl <- ident.sd %*% covar %*% ident.sd p.loading <- try(c.item.cor %*% solve(c.correl)) if(inherits(p.loading,"try-error")) {message('the correlation matrix was singular, pattern loadings not found, proceed with caution') p.loading <- c.item.cor} c.item.cor[abs(c.item.cor) > 99999] <- NA c.correl[abs(c.correl) > 99999] <- NA key.alpha <- ((var - key.count)/var) * (key.count/(key.count - 1)) key.alpha[is.nan(key.alpha)] <- 1 key.alpha[!is.finite(key.alpha)] <- 1 key.av.r <- key.alpha/(key.count - key.alpha*(key.count-1)) #alpha 1 = average r colnames(c.item.cor) <- colnames(keys) colnames(p.loading) <- colnames(keys) colnames(c.correl) <- colnames(keys) rownames(c.correl) <- colnames(keys) rownames(c.item.cor) <- rownames(r.mat) if( ncol(keys) >1) {cluster.corrected <- correct.cor(c.correl, t(key.alpha))} else {cluster.corrected <- c.correl} results <- list(loadings=c.item.cor,pattern=p.loading, cor=c.correl,corrected=cluster.corrected, sd = sqrt(var), alpha = key.alpha,av.r = key.av.r, size = key.count,G6=G6,Call=cl) class(results) <- c("psych","cluster.loadings") return(results) } psych/R/sim.parallel.r0000644000176200001440000000147113717575414014370 0ustar liggesusers"sim.parallel" <- function(ntrials=10,nvar = c(12,24,36,48),nfact = c(1,2,3,4,6), n = c(200,400)) { nvariables = nvar factors = nfact subjects = n result <- matrix(NaN,ncol=7,nrow=ntrials*length(nvariables) * length(subjects) * length(factors)) k <- 1 for (nfact in factors) { for (nvar in nvariables) { for (nsub in subjects) { for (trials in 1:ntrials) { x <- sim.minor(nvar=nvar,nfact=nfact,n=nsub)$observed fp <- fa.parallel(x) fps <- fa.parallel(x,SMC=TRUE) result[k,1] <- nfact result[k,2] <- nvar result[k,3] <- trials result[k,4] <- fp$nfact result[k,5] <- fps$nfact result[k,6] <- fp$ncomp result[k,7] <- nsub k <- k + 1 } #trials } #subjects }#variables }#factors colnames(result) <- c("factors","nvar","trials","nfact","smc.fact","ncomp","nsub") return(result) } psych/R/glb.algebraic.R0000644000176200001440000000356013625303521014404 0ustar liggesusers#Written by Andreas Moltner with some revisions by William Revelle #March, 2010 "glb.algebraic"<- function(Cov,LoBounds=NULL, UpBounds=NULL) { if(!requireNamespace('Rcsdp')) {stop("Rcsdp must be installed to find the glb.algebraic") } cl<-match.call() # check input p<-dim(Cov)[2] if (dim(Cov)[1] != p) Cov <- cov(Cov) #find the covariances if (any(t(Cov)!=Cov)) stop("'Cov' is not symmetric") if(is.null(LoBounds)) LoBounds <-rep(0,ncol(Cov)) if(is.null(UpBounds)) UpBounds <- diag(Cov) if (any(LoBounds>UpBounds)) { stop("'LoBounds'<='UpBounds' violated") } # if (min(eigen(Cov,symmetric=TRUE,only.values=TRUE)$values)<0) # stop("'Cov' is not positive semidefinite") if (length(LoBounds) != p) stop("length(LoBounds) != dim(Cov)") if (length(UpBounds) != p) stop("length(UpBounds)!=dim(Cov)") Var<-diag(Cov) # objective function opt --> min opt=rep(1,p) # set up csdp input C<-list(diag(Var)-Cov, -UpBounds,LoBounds) A<-vector("list",p) for (i in 1:p) { b<-rep(0,p) b[i]<-1 A[[i]]<-list(diag(b),-b,b) } K<-list(type=c("s","l","l"),size=rep(p,3)) # call csdp #control=csdp.control(printlevel=0) result<- Rcsdp::csdp(C,A,opt,K, control=Rcsdp::csdp.control(printlevel=0)) #result<- Rcsdp::csdp(C,A,opt,K) if (result$status>=4||result$status==2) { warning("Failure of csdp, status of solution=",result$status) lb<-list(glb=NA,solution=NA,status=result$status,Call=cl) } else { if (result$status!=0) { warning("status of solution=",result$status) } # greatest lower bound to reliability item.diag <- result$y names(item.diag) <- colnames(Cov) lb<-list(glb=(sum(Cov)-sum(Var)+sum(result$y))/sum(Cov), solution = item.diag, status=result$status, Call=cl) } return(lb) } psych/R/logistic.R0000644000176200001440000000070011352703632013540 0ustar liggesusers"logistic" <- function(x,d=0, a=1,c=0,z=1) {c + (z-c)/(1+exp(a*(d-x)))} "logit" <- function(p) {log(p/(1-p))} #created March 20, 2010 #graded response model "logistic.grm" <- function(x,d=0,a=1.5,c=0,z=1,r=2,s=c(-1.5,-.5,.5,1.5)){ if (r == 1) {p <- (1-logistic(x,d=s[1],a=a,c=c,z=z))} else { if (r == (length(s)+1)) {p <- logistic(x,d=s[r-1],a=a,c=c,z=z) } else { p <- logistic(x,d=s[r-1],a=a,c=c,z=z) - logistic(x,d=s[r],a=a,c=c,z=z ) }} p}psych/R/smc.R0000644000176200001440000000774114262102455012520 0ustar liggesusers#modified Dec 10, 2008 to return 1 on diagonal if non-invertible #modifed March 20, 2009 to return smcs * variance if covariance matrix is desired #modified April 8, 2009 to remove bug introduced March 10 when using covar from data #modified Jan 14, 2010 to test if matrix before cov2cor call. #modified October 2, 2010 to convert smcs < 0 to 0 -- this is situation encountered with extreme missingness in sapa matrices #modified April 23, 2015 to handle NAs in the correlation matrix #smcs are found for the non-NA variables, then, smcs for the remaining ones are found from the correlations for those with NAs "smc" <- function(R,covar =FALSE) { failed=FALSE wcc <- maxr <- NULL p <- dim(R)[2] if(is.null(colnames(R))) colnames(R) <- paste0("V",1:p) smc.all <- rep(NA,p) names(smc.all) <- colnames(R) if (dim(R)[1] != p) {if(covar) {C <- cov(R, use="pairwise") vari <- diag(C) R <- cov2cor(C) } else {R <- cor(R,use="pairwise")}} else {vari <- diag(R) if (!is.matrix(R)) R <- as.matrix(R) R <- cov2cor(R) } tempR <- NULL #look for Nan if(any(is.na(R))) { bad <- TRUE tempR <- R vr <- diag(tempR) diag(tempR) <- 0 maxr <- apply(tempR,2,function(x) max(abs(x),na.rm=TRUE)) diag(tempR) <- vr wcl <-NULL while(bad) { wc <- table(which(is.na(tempR), arr.ind=TRUE)) #find the correlations that are NA wcl <- c(wcl,as.numeric(names(which(wc==max(wc))))) tempR <- R[-wcl,-wcl] if(any(is.na(tempR))) {bad <- TRUE} else {bad <- FALSE} } warning("Missing values (NAs) in the correlation matrix do not allow for SMC's to be found for all variables. \nI will try to estimate SMCs for those variables by their non-NA correlations.") cat('\nSMC(s) for variables ',colnames(R)[wcl], 'were replaced (if possible) with smcs based upon their (its) non-NA correlations\n') #now, try to find the smcs for the other ones wc <-(which(is.na(R[,wcl]),arr.ind=TRUE)) if(is.null(dim(wc))) {wcc <- as.numeric(names(table(wc))) } else { wcc <- as.numeric(names(table(wc[,1])))} tempR <- R[-wcc,-wcc] R <- R[-wcl,-wcl] } #if(!covar) { R <- cor.smooth(R) } #probably no longer necessary to do this, since we do P.inv # R.inv <- try(solve(R),TRUE) # if(inherits(R.inv, as.character("try-error"))) {smc <- rep(1,p) # message("In smc, the correlation matrix was not invertible, smc's returned as 1s")} else {smc <- 1 -1/diag(R.inv)} R.inv <- Pinv(R) smc <- 1 - 1/diag(R.inv) names(smc) <- colnames(R) if(!is.null(tempR)) {# R.na.inv <- try(solve(tempR),TRUE) if(prod(dim(tempR)) > 0) {R.na.inv <- Pinv(tempR) smc.na <- smc.na <- 1 -1/diag(R.na.inv)} else {smc.na <- 1} # if(inherits(R.na.inv, as.character("try-error"))) {smc.na <- rep(1,p) # message("Oh bother, in smc, the correlation matrix of the adjusted part was not invertible, smc's returned as 1s")} else {smc.na <- 1 -1/diag(R.na.inv)} } else {smc.na <- smc} if(all(is.na(smc))) {message ("Something is seriously wrong the correlation matrix.\nIn smc, smcs were set to 1.0") smc[is.na(smc)] <- 1} if(max(smc,na.rm=TRUE) > 1.0) {message("In smc, smcs > 1 were set to 1.0") smc[smc >1 ] <- 1.0} if(min(smc,na.rm=TRUE) < 0.0) {message("In smc, smcs < 0 were set to .0") smc[smc < 0] <- 0} smc.all[names(smc.all) %in% names(smc)] <- smc if(!is.null(wcc)) {smc.all[wcl] <- smc.na[names(smc.all[wcl])] } smc <- smc.all if(!is.null(maxr)) { if(any(is.na(smc))) {warning("The SMCs with NA values were replaced by their maximum correlation.") cat('\nSMC(s) for variables ',names(smc)[is.na(smc)], 'were replaced with their maximum correlation \n')} smc[is.na(smc) ] <- maxr[is.na(smc)] #in case we could not fix everything } if(covar) {smc <- smc * vari} return(smc) }psych/R/score.alpha.r0000644000176200001440000000257512260123544014173 0ustar liggesusersscore.alpha <- function (keys,items,labels=NULL,totals=TRUE, digits=2) { .Deprecated("score.alpha", msg = "score.alpha is deprecated. Please use the scoreItems function") keys <- as.matrix(keys) #just in case they were not matrices to start with items <- as.matrix(items) scores <- items %*% keys #this actually does all the work if (length(labels)>0) {colnames(scores) <- labels} #add labels abskeys <- abs(keys) item.var <- diag(var(items,use="pairwise")) #find the item variances cov.scales <- cov(scores,use="pairwise") #and total scale variance var.scales <- diag(cov.scales) cor.scales <- cor(scores,use="pairwise") #could do this as matrix operation, but why bother sum.item.var <- item.var %*% abskeys num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale alpha.scale <- (var.scales - sum.item.var)*num.item/((num.item-1)*var.scales) if (length(labels)>0) {colnames(alpha.scale) <- labels} av.r <- alpha.scale/(num.item - alpha.scale*(num.item-1)) #alpha 1 = average r item.cor <- cor(items,scores,use="pairwise") if (!totals) scores <- scores/num.item #find averages results <- list(scores=scores,alpha=round(alpha.scale,digits), av.r=round(av.r,digits), n.items = num.item, cor = round(cor.scales,digits), item.cor = round(item.cor,digits)) class(results) <- "psych" return(results) } psych/R/fa.pooled.r0000644000176200001440000000652013763215465013652 0ustar liggesusersfa.pooled <- function(datasets,nfactors=1,rotate="oblimin",scores="regression", residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,oblique.scores=FALSE,np.obs=NULL,use="pairwise",cor="cor",correct=.5,weight=NULL,...) { cl <- match.call() replicates <- list() replicateslist <- list() rep.rots <- list() n.iter <- length(datasets) #the first fa becomes the target for the remaining ones X <- datasets[[1]] f <- fac(X,nfactors=nfactors,rotate=rotate,scores="none",SMC = SMC,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor,correct=correct,...=...) fl <- f$loadings nvar <- ncol(X) #now do the replicated for (iter in (1:n.iter)) { X <- datasets[[iter]] fs <- fac(X,nfactors=nfactors,rotate=rotate,scores="none",SMC = SMC,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor,correct=correct,...=...) #call fa with the appropriate parameters if(nfactors == 1) {replicateslist[[iter]] <- list(loadings=fs$loadings)} else { t.rot <- target.rot(fs$loadings,fl) if(!is.null(fs$Phi)) { phis <- fs$Phi # should we rotate the simulated factor correlations? #we should report the target rotated phis, not the untarget rotated phis replicateslist[[iter]] <- list(loadings=t.rot$loadings,phis=phis[lower.tri(t.rot$Phi)]) #corrected 6/10/15 #replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(phis)]) } else {replicateslist[[iter]] <- list(loadings=t.rot$loadings)} } } replicates <- matrix(unlist(replicateslist),nrow=n.iter,byrow=TRUE) means <- colMeans(replicates,na.rm=TRUE) sds <- apply(replicates,2,sd,na.rm=TRUE) if(length(means) > (nvar * nfactors) ) { means.rot <- means[(nvar*nfactors +1):length(means)] sds.rot <- sds[(nvar*nfactors +1):length(means)] ci.rot.lower <- means.rot + qnorm(p/2) * sds.rot ci.rot.upper <- means.rot + qnorm(1-p/2) * sds.rot ci.rot <- data.frame(lower=ci.rot.lower,upper=ci.rot.upper) } else { rep.rots <- NULL means.rot <- NULL sds.rot <- NULL z.rot <- NULL ci.rot <- NULL } means <- matrix(means[1:(nvar*nfactors)],ncol=nfactors) sds <- matrix(sds[1:(nvar*nfactors)],ncol=nfactors) tci <- abs(means)/sds ptci <- 1-pnorm(tci) if(!is.null(rep.rots)) { tcirot <- abs(means.rot)/sds.rot ptcirot <- 1- pnorm(tcirot)} else {tcirot <- NULL ptcirot <- NULL} ci.lower <- means + qnorm(p/2) * sds ci.upper <- means + qnorm(1-p/2) * sds ci <- data.frame(lower = ci.lower,upper=ci.upper) class(means) <- "loadings" colnames(means) <- colnames(sds) <- colnames(fl) rownames(means) <- rownames(sds) <- rownames(fl) f$cis <- list(means = means,sds = sds,ci = ci,p =2*ptci, means.rot=means.rot,sds.rot=sds.rot,ci.rot=ci.rot,p.rot = ptcirot,Call= cl,replicates=replicates,rep.rots=rep.rots) results <- f results$Call <- cl class(results) <- c("psych","fa.ci") return(results) } psych/R/sim.structural.R0000644000176200001440000004652414533706072014745 0ustar liggesusers"sim.structure" <- "sim.structural" <- function (fx=NULL,Phi=NULL,fy=NULL,f=NULL,n=0,uniq=NULL,raw=TRUE, items = FALSE, low=-2,high=2,d=NULL,cat=5,mu=0) { cl <- match.call() if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4) colnames(fx) <- paste0("F",1:4) rownames(fx) <- paste0("V",1:12) } #added default 10/21/23 if(is.null(f)) { if(is.null(fy)) {f <- fx} else { f <- superMatrix(fx,fy)} } f <- as.matrix(f) if(!is.null(Phi)) {if(length(Phi)==1) Phi <- matrix(c(1,Phi,Phi,1),2,2)} #these are parameters for simulating items nf <- ncol(f) nvar <- nrow(f) #if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar/(nf-1))) # d <- rep(d,nvar)} else {if(length(d)==1) d <- rep(d,nvar)} #a <- rep(1,nvar) if(is.vector(f)) {f <- as.matrix(f) #this is the case if doing a congeneric model Phi <- 1} if(!is.null(Phi)) { model <- f %*% Phi %*% t(f) #the model correlation matrix for oblique factors } else { model <- f%*% t(f)} if(is.null(uniq)) {diag(model) <- 1 } else { diag(model) <- uniq + diag(model)} # put ones along the diagonal unless uniq is specified nvar <- dim(f)[1] if(is.null(rownames(model))) {colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="")} #else {colnames(model) <- rownames(model) <- rownames(fx)} if(n>0) { mu <- rep(mu,nvar) #observed <- mvrnorm(n = n, mu, Sigma=model, tol = 1e-6, empirical = FALSE) eX <- eigen(model) theta <- matrix(rnorm(nvar * n),n) #random values observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(theta) + mu) #this way theta and observed not identical theta <- observed #but seems to make them identical if(items) { #the next 2 lines were dropped in 2021 because it was adding noise twice #observedp <- matrix(t(pnorm(a*t(observed)- d)),n,nvar) #this is not useful #observed[] <- rbinom(n*nvar, cat, observedp) #this puts in error again #put in the low and high values again --dropped in 2021 observed[observed high] <- high range.ob <- range(observed) observed <- (observed - range.ob[1])/(range.ob[2]- range.ob[1]) observed<- round((cat-1) * observed) + 1} colnames(observed) <- colnames(model) r <- cor(observed) } reliability <- diag(f %*% t(f)) if(n<1) {results <- list(model=model,reliability=reliability) } else { if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else { results <- list( model=model,reliability=reliability,r=r,observed= observed,theta=theta, N=n) } } results$Call <- cl class(results) <- c("psych", "sim") return(results)} # Replaced 1/8/21 with a version that reports theta # "sim" <- # function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE) { # cl <- match.call() # ##set up some default values # if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4) # if(is.null(Phi)) {Phi <- diag(1,4,4) # Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2 # diag(Phi) <- max((alpha + lambda),1) # Phi <- cov2cor(Phi)} # if(is.null(mu)) {mu <- c(0,.5,1,2)} } # if(is.null(fy)) {f <- fx} else { # f <- superMatrix(fx,fy)} # # if(is.null(mu)) {mu <- rep(0,ncol(fx))} # # means <- fx %*% mu # # if(is.vector(f)) {f <- as.matrix(f) #this is the case if doing a congeneric model # Phi <- 1} # if(!is.null(Phi)) { # model <- f %*% Phi %*% t(f) #the model correlation matrix for oblique factors # } else { model <- f%*% t(f)} # diag(model)<- 1 # put ones along the diagonal # nvar <- dim(f)[1] # colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="") # if(n>0) { # # # observed <- mvrnorm(n = n, means, Sigma=model, tol = 1e-6, empirical = FALSE) # eX <- eigen(model) # observed <- matrix(rnorm(nvar * n),n) # observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(observed) + rep(means,n)) # r <- cor(observed) } # reliability <- diag(f %*% t(f)) # if(n<1) {results <- list(model=model,reliability=reliability) } else { # if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else { # results <- list( model=model,reliability=reliability,r=r,observed= observed,N=n) } } # results$Call <- cl # class(results) <- c("psych", "sim") # return(results)} # ##### #Added 1/7/21 to report the generating theta #corrected 1/29/21 to correctly report generating theta (with correlations) #### # "sim" <- # function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE,theta=TRUE) { # cl <- match.call() # # ##set up some default values # if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4)} # if(is.vector(fx)) {fx <- as.matrix(fx) } #this is the case if doing a congeneric model # # if(is.vector(fy)) {fy <- as.matrix(fy)} # # nvar <- NROW(fx) # nfact <- NCOL(fx) # if(is.null(Phi)& is.null(fy)) {Phi <- diag(nfact) # Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2 # diag(Phi) <- max((alpha + lambda),1) # Phi <- cov2cor(Phi)} #put in a simplex structure as default # # if(is.null(fy)) {f <- fx} else { # f <- superMatrix(fx,fy)} # # if(is.null(mu)) mu <- as.matrix(0:(NCOL(f)-1),drop=FALSE) # if(is.null(Phi)) Phi <- diag(NCOL(f)) # # if(is.null(mu)) {mu <- as.matrix(rep(0,NCOL(f)),drop=FALSE)} # # means <- f %*% mu # # # model <- f %*% Phi %*% t(f) #the model correlation matrix for oblique factors # # # nvar <- NROW(f) #we do this here because we have made f a supermatrix of fx and fy # colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="") # # # if((n > 0)) { # nfactors <- NCOL(f) # theta <- matrix(rnorm(n * nfactors),n) #make up some random data # # eX <- eigen(model) #the model is actually the covariance matrix # t.scores <- t(eX$vectors[,1:nfactors] %*% diag(sqrt(pmax(eX$values[1:nfactors], 0))) %*% t(theta) + rep(means,n)) #the true scores for each variable # U <- diag(sqrt(1-diag(model))) # error <- t(U %*% matrix(rnorm(n * nvar),nrow=nvar)) # observed <- t.scores + error # colnames(observed) <- paste0("V",1:ncol(observed)) # r <- cor(observed) # if(!is.null(Phi)) { exPhi <- eigen(Phi) # # f.scores <- f.scores %*% Phi # # #f.scores <-t((exPhi$vectors %*% diag(sqrt(exPhi$values)) %* t(f.scores)) + mu)} # theta <- t(exPhi$vectors %*% diag(sqrt(exPhi$values)) %*% t(theta) + rep(mu,n) ) # } # } # else { # # # diag(model)<- 1 # put ones along the diagonal # reliability <- diag(f %*% t(f)) # if(n < 1) {results <- list(model=model,reliability=reliability) } else { # if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else { # results <- list( model=model,reliability=reliability,r=r, observed= observed,theta=theta,Phi=Phi, N=n) } } # results$Call <- cl # class(results) <- c("psych", "sim") # return(results)} # # #complete rewrite to allow the true factor scores (theta) to be reported #rewritten March, 2021 #added threshold option April 2023 #added the items and low, cat options to make a more general simulation function 10/20/23 "sim" <- function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE,threshold=NULL,items = FALSE, low=-2,high=2,cat=5) { cl <- match.call() ##set up some default values # 4 correlated factors growing over time # The four factors have a simplex structure if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4) if(is.null(Phi)) {Phi <- diag(NCOL(fx)) #create a simplex of the factors Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2 diag(Phi) <- max((alpha + lambda),1) Phi <- cov2cor(Phi)} if(is.null(mu)) {mu <- seq(0,NCOL(fx)-1)} else {if(length(mu)< NCOL(fx)) mu <- sample(mu,NCOL(fx),replace=TRUE)} } #end of default if(is.null(fy)) {f <- fx} else { f <- superMatrix(fx,fy)} nvar <- NROW(f) nfactors <- NCOL(f) if(is.vector(f)) {f <- as.matrix(f) #this is the case if doing a congeneric model Phi <- 1} if(is.null(mu)) {mu <- rep(0,NCOL(f))} means <- f %*% mu if(is.null(Phi)) {Phi <-diag(NCOL(f))} model <- f %*% Phi %*% t(f) #the model correlation matrix for oblique factors U <- diag(sqrt(1-diag(model))) #the uniquensses diag(model)<- 1 # put ones along the diagonal colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="") #Create factor scores and raw data if((n>0) ) { if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)} theta <- matrix(rnorm(n * nfactors),ncol=nfactors) #the orthogonal factors et <- eigen(Phi) theta <- t(et$vectors %*% diag(sqrt(et$values ))%*% t(theta)) #the correlated factors observed <- theta %*% t(f) colnames(theta) <- colnames(f) error <- t(U %*% matrix(rnorm(n * nvar),nrow=nvar)) observed <- t(t(observed + error) + rep(means,n)) #observed are factors * loadings + error + means if(items) { observed[observed high] <- high range.ob <- range(observed) observed <- (observed - range.ob[1])/(range.ob[2]- range.ob[1]) observed<- round((cat-1) * observed) + 1} if(!is.null(threshold)) { i <- 1:nvar observed <- t(t(observed [,i]) > threshold[i]) + 0 } r <- cor(observed) #will approximate the model } reliability <- diag(f %*% t(f)) if(n<1) {results <- list(model=model,reliability=reliability) } else { if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else { results <- list( model=model,reliability=reliability,r=r,observed= observed,theta=theta,N=n) } } results$Call <- cl class(results) <- c("psych", "sim") return(results)} ########################### "sim.simplex" <- function(nvar =12, alpha=.8,lambda=0,beta=1,mu=NULL, n=0,threshold=NULL) { cl <- match.call() R <- matrix(0,nvar,nvar) R[] <- alpha^abs(col(R)-row(R))*beta + lambda^2 diag(R) <- max((alpha * beta) + lambda,1) R <- cov2cor(R) colnames(R) <- rownames(R) <- paste("V",1:nvar,sep="") if(is.null(mu)) {mu <- rep(0,nvar)} if(n>0) { #observed.scores <- mvrnorm(n = n, mu, Sigma=R, tol = 1e-6, empirical = FALSE) observed <- matrix(rnorm(nvar *n),n) eX <- eigen(R) observed.scores <- matrix(rnorm(nvar * n),n) observed.scores <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(observed)+mu) if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE) i <- 1:nvar observed.scores <- t(t(observed.scores [,i]) > threshold[i]) + 0 } colnames(observed.scores)<- paste0("V",1:nvar) observed <- cor(observed.scores) results <- list(model=R,r=observed,observed=observed.scores) results$Call <- cl class(results) <- c("psych", "sim")} else {results <- R} results } #simulate major and minor factors #modified October 18, 2022 to allow specification of g and fbig for all variables #further modified December 4, 2023 to allow specificaton of Ph "sim.minor" <- function(nvar=12, nfact=3,n=0,g=NULL,fbig=NULL,fsmall = c(-.2,.2),n.small=NULL,Phi=NULL, bipolar=TRUE, threshold=NULL) { if(is.null(fbig)) {loads <- c(.8,.6) loads <- sample(loads,nvar/nfact,replace=TRUE) if(nfact == 1) {fx <- matrix(loads,ncol=1)} else {fx <- matrix(c(rep(c(loads,rep(0,nvar)),(nfact-1)),loads),ncol=nfact)} if(bipolar) fx <- 2*((sample(2,nvar,replace=TRUE) %%2)-.5) * fx } else {fx <- fbig nvar <- NROW(fx) nfact <- NCOL(fx)} if(!is.null(g)) {if (length(g) < nvar) {g <- sample(g,nvar,replace=TRUE)} fx <- cbind(g,fx) } if( !is.null(fsmall)) { if(is.null(n.small)) n.small=nvar/4 fsmall <- c(fsmall,rep(0,n.small)) fs <- matrix(sample(fsmall,nvar*floor(n.small),replace=TRUE),ncol=floor((n.small))) fload <- cbind(fx,fs) if(is.null(g)) { colnames(fload) <- c(paste("F",1:nfact,sep=""),paste("m",1:(n.small),sep=""))} else { colnames(fload) <- c("g",paste("F",1:nfact,sep=""),paste("m",1:(n.small),sep=""))} } else { fload <- fx colnames(fload) <- paste0("F",1:nfact) } rownames(fload) <- paste0("V",1:nvar) sanity.check <- h2 <- apply(fload,1,function(x) sum(x^2)) if(max(sanity.check) > 1.0) {print(cbind(fload, h2)) stop("Model is impossible. Communalities (h2) exceed 1. ")} if(!is.null(Phi)) {Phi <- superMatrix(Phi,diag(n.small))} #added 12/3/23 results <- sim(fload,n=n, Phi=Phi,threshold=threshold) results$fload <- fload class(results) <- c("psych", "sim") return(results) } #simulate various structures and summarize them "sim.omega" <- function(nvar=12,nfact=3,n=500,g=NULL,sem=FALSE,fbig=NULL,fsmall = c(-.2,.2),bipolar=TRUE,om.fact=3,flip=TRUE,option="equal",ntrials=10,threshold=NULL) { results <- matrix(NaN,nrow=ntrials,ncol=12) colnames(results) <- c("n","om.model","omega","ev.N","e.f1","omega.f1","Beta","omegaCFA","omegaSem","rms","RMSEA","coeff.v") #iterate the entire process n.trials times for (i in 1:ntrials) { #make up some data using the Tucker notion of big and small factors x <- try(sim.minor(nvar=nvar,nfact=nfact,n=n,g=g,fbig=fbig,fsmall=fsmall,bipolar=bipolar,threshold=threshold)) if(is.null(g)) {omega.model <- 0} else {gsum <- colSums(x$fload)[1] omega.model <- gsum^2/sum(x$model)} results[i,"om.model"] <- omega.model observed.cor <- cor(x$observed) ev <- eigen(observed.cor)$values f1 <- fa(observed.cor)$loadings om.fa <- sum(f1)^2/sum(observed.cor) e.f1 <- sum(f1^2)/nvar sem.model <- omegaSem(x$fload[,1:nfact],sl=TRUE,nfactors=nfact) #this is the model based upon the true values if(sem) {stop('The option to use the sem package has been replaced with calls to lavaan') #if(!requireNamespace('sem')) {stop("You must have the sem package installed to use omegaSem")} else {sem.om <- try(sem(model=sem.model,S=observed.cor, N=n))} #omega.cfa <- omegaFromSem(observed.cor,sem.om,flip=flip) # if(omega.cfa$omega >1) omega.cfa$omega <- NA #results[i,"omegaCFA"] <- omega.cfa$omega } else {omega.cfa <- NULL} results[i,"n"] <- n if(n > 0) { if (sem) {om <- try(omegaSem(x$observed,om.fact,flip=flip,plot=FALSE,option=option))} else { om <- try(omega(x$observed,om.fact,flip=flip,plot=FALSE,option=option))} ic <- suppressWarnings(ICLUST(x$observed,1,plot=FALSE))} else { if (sem) {om <- try(omegaSem(x$model,om.fact,flip=flip,plot=FALSE,option=option))} else {om <- try(omega(x$model,om.fact,flip=flip,plot=FALSE,option=option)) if(inherits(om,"try-error")) {message("Error in sem. iteration = ",i) om <- NA next}} ic <- suppressWarnings(ICLUST(x$model,1,plot=FALSE))} #results if(sem) {results[i,"omega"] <- om$omegaSem$omega_h loads <- om$omegaSem$schmid$sl } else {results[i,"omega"] <- om$omega_h loads <- om$schmid$sl } p2 <- loads[,ncol(loads)] mp2 <- mean(p2) vp2 <- var(p2) #results[i,"p2"] <- mp2 #results[i,"p2.sd"] <- sqrt(vp2) results[i,"coeff.v"] <- sqrt(vp2)/mp2 results[i,"Beta"] <- ic$beta results[i,"ev.N"] <- ev[1]/nvar results[i,"e.f1"] <- e.f1 results[i,"omega.f1"] <- om.fa if(sem) { if(!is.null(om$omegaSem$schmid$RMSEA)) {results[i,"RMSEA"] <-om$omegaSem$schmid$RMSEA[1]} else {results[i,"RMSEA"] <- NA} if(!is.null(om$omegaSem$schmid$rms))results[i,"rms"] <- om$omegaSem$schmid$rms results[i,"omegaSem"] <- om$omega.efa$omega if(results[i,"omegaSem"] > 1) {warning("Bad result from sem case = ",i) results[i,"omegaSem"] <- NA} } else { if(!is.null(om$schmid$RMSEA)) {results[i,"RMSEA"] <- om$schmid$RMSEA[1]} else {results[i,"RMSEA"] <- NA} if(!is.null(om$schmid$rms)) results[i,"rms"] <- om$schmid$rms results[i,"omegaSem"] <- NA} } if(n==0) {results <- results[,-which(colnames(results)=="RMSEA")] #drop RMSEA if there are no cases if(!sem) results <- results[,-which(colnames(results)=="omegaSem")] } else {if(!sem) results <- results[,-which(colnames(results)=="omegaSem")] } return(results) } "sim.omega.2" <- function(nvar=12,nfact=3,n=c(100,200,400,800),g=c(0,.1,.2,.3,.4,.5),sem=TRUE,fbig=c(.7,.6),fsmall=c(-.2,.2),bipolar=FALSE,om.fact=3,ntrials=10) { result <- list() k <- 1 progressBar(k,length(n)*length(g),"sim.omega.2") for (ni in 1:length(n)) { for (gi in 1:length(g)) { result[[k]] <- sim.omega(nvar=nvar,nfact=nfact,n=n[ni],g =g[gi],fbig=fbig,fsmall=fsmall,bipolar=bipolar,ntrials=ntrials,om.fact=om.fact,sem=sem) k <- k+1 } } cnames <- colnames(result[[1]]) #result <- unlist(result) #if(sem) {result <- matrix(result,ncol=10,byrow=TRUE)} else {result <- matrix(result,ncol=9,byrow=TRUE) } #colnames(result) <- cnames return(result) } "sim.general" <- function(nvar=9,nfact=3, g=.3,r=.3,n=0) { r1 <- matrix(r,nvar/nfact,nvar/nfact) R <- matrix(g,nvar,nvar) rf <- superMatrix(r1,r1) if(nfact>2) {for (f in 1:(nfact-2)){ rf <- superMatrix(r1,rf)}} R <- R + rf diag(R) <- 1 colnames(R) <- rownames(R) <- paste((paste("V",1:(nvar/nfact),sep="")),rep(1:nfact,each=(nvar/nfact)),sep="gr") if(n > 0) {#x <- mvrnorm(n = n, mu=rep(0,nvar), Sigma = R, tol = 1e-06,empirical = FALSE) eX <- eigen(R) x <- matrix(rnorm(nvar * n),n) x <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(x)) return(x)} else { return(R)} } #not public #simulate the difference between two groups sim.groups <- function(n=1000,r=.5,d=.5,nvar = 2,bg= -1) { model <- matrix(r,nvar,nvar) diag(model) <- 1 eX <- eigen(model) observed <- matrix(rnorm(nvar * n),n) observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(observed)) n1 <- n/2 if(bg < 1 ) { mu1 <- c(0,d)} else {mu1 <- 0 } group1 <- t(t( observed[1:n1,]) + mu1 ) if(bg < 1) {mu2 <- c(d,0)} else {mu2 <- d} group2 <- t( t(observed[(n1+1):n,]) + mu2) data <- data.frame(grp=1,vars=group1) data2 <- data.frame(grp=2,vars=group2) data <- rbind(data,data2) return(data) } "sim.general.theta" <- function(nvar=9,nfact=3, g=.3,r=.3,n=0) { #require(MASS) r1 <- matrix(r,nvar/nfact,nvar/nfact) R <- matrix(g,nvar,nvar) rf <- superMatrix(r1,r1) if(nfact>2) {for (f in 1:(nfact-2)){ rf <- superMatrix(r1,rf)}} R <- R + rf diag(R) <- 1 colnames(R) <- rownames(R) <- paste((paste("V",1:(nvar/nfact),sep="")),rep(1:nfact,each=(nvar/nfact)),sep="gr") if(n > 0) {#x <- mvrnorm(n = n, mu=rep(0,nvar), Sigma = R, tol = 1e-06,empirical = FALSE) eX <- eigen(R) x <- matrix(rnorm(nvar * n),n) x <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(x)) return(x)} else { return(R)} }psych/R/fa.random.R0000644000176200001440000000161213122520560013565 0ustar liggesusers"fa.random" <- function(data,nfactors=1,fix=TRUE,n.obs = NA,n.iter=1,rotate="oblimin",scores="regression", residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,oblique.scores=FALSE,np.obs=NULL,use="pairwise",cor="cor",weight=NULL,...) { subject <- rowMeans(data,na.rm=TRUE) r <- cor(data,subject,use="pairwise") colnames(r) <-"within" data <- data - subject+ fix * rnorm(NROW(data),0,.03) f <- fa(r=data,nfactors=nfactors,n.obs=n.obs,rotate=rotate,scores=scores,residuals=residuals,SMC = SMC,covar=covar,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor, weight=weight,...=...) #call fa with the appropriate parameters f$subject <- subject f$within.r <- r return(f) } psych/R/omega.R0000644000176200001440000002602714420776262013035 0ustar liggesusers"omegah" <- function(m,nfactors=3,fm="minres",key=NULL,flip=TRUE, digits=2,title="Omega",sl=TRUE,labels=NULL, plot=TRUE,n.obs=NA,rotate="oblimin",Phi = NULL,option="equal",covar=FALSE,two.ok=FALSE,...) { #m is a correlation matrix, or if not, the correlation matrix is found #nfactors is the number of factors to extract #key allows items to be reversed scored if desired #if Phi is not null, this implies that we have been given a factor matrix -- added May 30, 2010 if(!requireNamespace('GPArotation') && (rotate !="cluster")) {stop("I am sorry, you need to have the GPArotation package installed")} cl <- match.call() nvar <- dim(m)[2] raw.data <- NULL if(is.null(Phi)) { #the normal case is to do the factor analysis of the raw data or the correlation matrix if(!isCorrelation(m)) { #should also check for covariance matrix n.obs <- dim(m)[1] m <- as.matrix(m) raw.data <- m #added 9/1/14 if(covar) {m <- cov(m,use="pairwise") } else {m <- cor(m,use="pairwise")} } else { if(!covar) m <- cov2cor(as.matrix(m)) #make sure it is a correlation matrix not a covariance or data matrix (if we change this, we will need to change the calculation for omega later) } if(is.null(colnames(m))) { rownames(m) <- colnames(m) <- paste("V",1:nvar,sep="") } m.names <- colnames(m) if (!is.null(key)) { m <- diag(key) %*% m %*% diag(key) colnames(m) <- m.names #flip items if we choose to do so flip <- FALSE #we do this if we specify the key } else {key <- rep(1,nvar) } signkey <- strtrim(key,1) signkey[signkey=="1"] <- "" m.names <- paste(m.names,signkey,sep="") colnames(m) <- rownames(m) <- m.names if ((nvar < 6) && (fm =="mle") ) {message(paste("In omega, 3 factors are too many for ",nvar," variables using mle. Using minres instead",sep="")) fm <- "minres"} } else { m.names <- rownames(m) } #add the names if we have a factor input gf <-schmid(m,nfactors,fm,digits,rotate=rotate,n.obs=n.obs,Phi=Phi,option=option,covar=covar,two.ok=two.ok, ...) if(!is.null(Phi)) { model <- m nfactors <- dim(model)[2] m <- factor.model(model,Phi=Phi,U2=FALSE) #estimate the correlation matrix from the factor model nvar <- dim(m)[2] if(is.null(rownames(m))) {colnames(m) <- rownames(m) <- paste("V",1:nvar)} } gload <- gf$sl[,1] if (flip) { #should we think about flipping items ? key <- sign(gload) key[key==0] <- 1 # a rare and weird case where the gloading is 0 and thus needs not be flipped if (sum(key) < nvar) { #some items have negative g loadings and should be flipped m <- diag(key) %*% m %*% diag(key) #this is just flipping the correlation matrix so we can calculate alpha gf$sl[,1:(nfactors+1)] <- diag(key) %*% gf$sl[,1:(nfactors+1)] signkey <- strtrim(key,1) signkey[signkey=="1"] <- "" m.names <- paste(m.names,signkey,sep="") colnames(m) <- rownames(m) <- m.names rownames(gf$sl) <- m.names } } Vt <- sum(m) #find the total variance in the scale Vitem <- sum(diag(m)) gload <- gf$sl[,1] gsq <- (sum(gload))^2 uniq <- sum(gf$sl[,"u2"]) if((nfactors == 1) && (fm=="pc")) {gsq <- Vt - uniq warning("omega_h is not meaningful for a principal components analysis with one component")} #weird condition when using fm=pc and 1 factor om.tot <- (Vt-uniq)/Vt om.limit <- gsq/(Vt-uniq) alpha <- ((Vt-Vitem)/Vt)*(nvar/(nvar-1)) sum.smc <- sum(smc(m,covar=covar)) lambda.6 <- (Vt +sum.smc-sum(diag(m)))/Vt if (!is.null(digits)) {omega <-list(omega_h= gsq/Vt,alpha=alpha,lambda.6 = lambda.6,omega.tot =om.tot,schmid=gf ,key = key,title=title) dg <-max(digits-1,1)} else { omega <- list(omega_h= gsq/Vt,alpha=alpha,omega.tot=om.tot,schmid=gf,key=key,title=title) dg <- 1} ev <- colSums(gf$sl[,1:(nfactors+1)]^2) ECV <- ev[1]/sum(ev) omega.stats <- factor.stats(m,gf$sl[,1:(nfactors+1)],n.obs=n.obs) general.stats <- factor.stats(m,as.matrix(gf$sl[,1]),n.obs=n.obs) #just get fit for the general factor if (nfactors<2) plot <- FALSE # if(require(Rgraphviz) && plot) {omega.model <-omega.graph(omega,title=title,sl=sl,labels=labels,digits=dg) } else {omega.model <- omega.sem(omega,sl=sl)} omega.model <- omega.sem(omega,sl=sl) #find the subset omegas omg <- omgo <- omt<- rep(NA,nfactors+1) sub <- apply(gf$sl,1,function(x) which.max(abs(x[2:(nfactors+1)]))) grs <- 0 for(group in( 1:nfactors)) { groupi <- which(sub==group) if(length(groupi) > 0) { Vgr <- sum(m[groupi,groupi]) gr <- sum(gf$sl[groupi,(group+1)]) grs <- grs + gr^2 omg[group+1] <- gr^2/Vgr omgo[group+1] <- sum(gf$sl[groupi,1])^2/Vgr omt[group+1] <- (gr^2+ sum(gf$sl[groupi,1])^2)/Vgr } omgo[1] <- sum(gf$sl[,1])^2/sum(m) #omega h omg[1] <- grs/sum(m) #omega of subscales omt[1] <- om.tot om.group <- data.frame(total=omt,general=omgo,group=omg) rownames(om.group) <- colnames(gf$sl)[1:(nfactors+1)] } #moved after tge bext line (6/21/18) #we should standardize the raw.data before doing the next step #change this to call factor.scores if(!is.null(raw.data)) { #added mean imputation 4/22/23 miss <- which(is.na(raw.data),arr.ind=TRUE) item.means <- colMeans(raw.data,na.rm=TRUE) #replace missing values with means raw.data[miss]<- item.means[miss[,2]] z.data <- scale(raw.data) scores <- z.data %*% omega.stats$weights } else {scores<- NULL} # if(!is.null(raw.data)) {scores <- raw.data %*% omega.stats$weights} else {scores<- NULL} # } omega <- list(omega_h= gsq/Vt,omega.lim = om.limit,alpha=alpha,omega.tot=om.tot,G6=lambda.6,schmid=gf,key=key,stats = omega.stats,ECV=ECV,gstats = general.stats,call=cl,title=title,R = m,model=omega.model,omega.group=om.group,scores=scores,Call=cl) class(omega) <- c("psych","omega") if(plot) omega.diagram(omega,main=title,sl=sl,labels=labels,digits=dg) return(omega) } #April 4, 2011 added a check for fm=pc and nfactors == 1 to solve problem of omega_h < omega_t -- probably not a good idea. removed #January 9, 2014 added omega scores if the raw data are given "omega" <- function(m,nfactors=3,fm="minres",n.iter=1,p=.05,poly=FALSE,key=NULL,flip=TRUE, digits=2,title="Omega",sl=TRUE,labels=NULL, plot=TRUE,n.obs=NA,rotate="oblimin",Phi = NULL,option="equal",covar=FALSE,...) { cl <- match.call() if(is.data.frame(m) || is.matrix(m)) {if((isCorrelation(m)) | (isCovariance(m) && covar)) {if(is.na(n.obs) && (n.iter > 1)) stop("You must specify the number of subjects if giving a correlation matrix") # if(!require(MASS)) stop("You must have MASS installed to simulate data from a correlation matrix") } } if(!is.data.frame(m) && !is.matrix(m)) { n.obs=m$n.obs if(poly) { pol <- list(rho=m$rho,tau = m$tau,n.obs=m$n.obs) m <- m$rho } else { m <- m$R } } else { #new data if(poly) { pol <- polychoric(m) m <- pol$rho n.obs <- pol$n.obs} } om <- omegah(m=m,nfactors=nfactors,fm=fm,key=key,flip=flip, digits=digits,title=title,sl=sl,labels=labels, plot=plot,n.obs=n.obs,rotate=rotate,Phi = Phi,option=option,covar=covar,...) #call omega with the appropriate parameters if(is.na(n.obs) ) {n.obs <- om$stats$n.obs} replicates <- list() if(n.iter > 1) {for (trials in 1:n.iter) { if(dim(m)[1] == dim(m)[2]) {#create data sampled from multivariate normal with correlation nvar <- dim(m)[1] #mu <- rep(0, nvar) # m <- mvrnorm(n = n.obs, mu, Sigma = m, tol = 1e-06, empirical = FALSE) #the next 3 lines replaces mvrnorm (taken from mvrnorm, but without the checks) eX <- eigen(m) m <- matrix(rnorm(nvar * n.obs),n.obs) m <- t(eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(m)) } else {m <- m[sample(n.obs,n.obs,replace=TRUE),]} if(poly) {pol <- polychoric(m) oms <- omegah(m=pol$rho,nfactors=nfactors,fm=fm,key=key,flip=flip, digits=digits,title=title,sl=sl,labels=labels, plot=plot,n.obs=pol$n.obs,rotate=rotate,Phi = Phi,option=option,...) #call omega with the appropriate parameters } else { oms <- omegah(m=m,nfactors=nfactors,fm=fm,key=key,flip=flip, digits=digits,title=title,sl=sl,labels=labels, plot=plot,n.obs=n.obs,rotate=rotate,Phi = Phi,option=option,...) #call omega with the appropriate parameters } # oms <-omegah(m=m,nfactors=nfactors,fm=fm,key=key,flip=flip, digits=digits,title=title,sl=sl,labels=labels, plot=plot,n.obs=n.obs,rotate=rotate,Phi = Phi,option=option,...) #call fa with the appropriate parameters replicates[[trials]] <- list(omega=oms$omega_h,alpha=oms$alpha,omega.tot=oms$omega.tot,G6=oms$G6,omega.lim=oms$omega.lim) } replicates <- matrix(unlist(replicates),ncol=5,byrow=TRUE) z.replicates <- cbind(fisherz(replicates[,1:4]),replicates[,5]) #convert to z scores means <- colMeans(z.replicates,na.rm=TRUE) sds <- apply(z.replicates,2,sd,na.rm=TRUE) ci.lower <- means + qnorm(p/2) * sds ci.upper <- means + qnorm(1-p/2) * sds ci <- data.frame(lower = ci.lower,upper=ci.upper) ci <- rbind(fisherz2r(ci[1:4,]),ci[5,]) rownames(ci) <- c("omega_h","alpha","omega_tot","G6","omega_lim") colnames(replicates) <- names(means) <- names(sds) <- rownames(ci) conf <- list(means = means,sds = sds,ci = ci,Call= cl,replicates=replicates) om$Call=cl results <- list(om = om,ci=conf) } else {om$Call=cl if(poly) {om$rho <- pol$rho om$tau <- pol$tau om$n.obs <- pol$n.obs } results <- om} class(results) <- c("psych","omega") return(results) } #written April 25, 2011 #adapted May 12, 2011 to be the primary version of omega psych/R/kappa.R0000644000176200001440000001313314372526173013033 0ustar liggesusers "wkappa" <- function(x,w=NULL) { p <- dim(x)[2] if (dim(x)[1]!= p) x <- table(x[,1],x[,2]) x <- as.matrix(x) tot <- sum(x) x <- x/tot #convert to probabilities rs <- rowSums(x) cs <- colSums(x) prob <- rs %*% t(cs) po <- tr(x) pc <- tr(prob) kappa <- (po-pc)/(1-pc) if(is.null(w)) { w <- matrix(0,ncol=p,nrow=p) for (i in 1:p) { for (j in 1:p) { w[i,j] <- 1- (abs(i-j))^2/9 } }} weighted.prob <- w*prob weighted.obser <- w*x #wkappa <- 1-sum(weighted.obser)/sum(weighted.prob) wpo <- sum(weighted.obser) wpc <- sum(weighted.prob) wkappa <- (wpo-wpc)/(1-wpc) return(list(kappa=kappa,weighted.kappa = wkappa)) } "cohen.kappa" <- function(x, w=NULL,n.obs=NULL,alpha=.05,levels=NULL) { cl <- match.call() p <- dim(x)[1] len <- p bad <- FALSE if ((dim(x)[2] == p) ||(dim(x)[2] < 3)) {result <- cohen.kappa1(x, w=w,n.obs=n.obs,alpha=alpha,levels=levels) } else { nvar <- dim(x)[2] ck <- matrix(NA,nvar,nvar) if(!is.null(colnames(x)) ){colnames(ck) <- rownames(ck) <- colnames(x)} else {colnames(ck) <- rownames(ck) <- paste("R",1:nvar,sep="") } diag(ck) <- 1 result <- list(cohen.kappa=ck) k <- 2 for (i in 2:nvar ) { for (j in 1:(i-1) ) { x1 <- data.frame(x[,i],x[,j]) x1 <- na.omit(x1) ck1 <- cohen.kappa1(x1, w=w,n.obs=n.obs,alpha=alpha,levels=levels) result[[paste(colnames(ck)[j],rownames(ck)[i])]] <- ck1 if(ck1$bad) {warning("No variance detected in cells " ,i," ",j) bad <- TRUE} ck[i,j] <- result[[k]]$kappa ck[j,i] <- result[[k]]$weighted.kappa k <- k + 1 } } result[[1]] <- ck av.kappa <- mean(ck[lower.tri(ck)],na.rm=TRUE) av.wt <- mean(ck[upper.tri(ck)],na.rm=TRUE) result$av.kappa <- av.kappa result$av.wt <- av.wt } if(bad) message("At least one item had no variance. Try describe(your.data) to find the problem.") class(result) <- c("psych","kappa") return(result) } "cohen.kappa1" <- function(x, w=NULL,n.obs=NULL,alpha=.05,levels=NULL) { cl <- match.call() p <- dim(x)[1] len <- p bad <- FALSE if (dim(x)[2]!= p) { x1 <- x[,1] x2 <- x[,2] if(is.factor(x1) ) { #this gets around a problem of tabling numbers as characters (bad idea) vs. tabling characters (good idea) x1 <- as.character(x[,1]) x2 <- as.character(x[,2])} else { x1 <- x[,1] x2 <- x[,2]} if(!is.null(levels)) {labels <- levels} else { labels <- levels(as.factor(cbind(x1,x2)))} len <- length(labels) x <- matrix(0,ncol=len,nrow=len) colnames(x) <- rownames(x) <- labels x1f <- factor(x1,levels=labels) x2f <- factor(x2,levels=labels) x <- table(x1f,x2f) # #for (item in 1:p) {x[x1[item],x2[item]] <- x[x1[item],x2[item]] +1} } x <- as.matrix(x) tot <- sum(x) x <- x/tot #convert to probabilities rs <- rowSums(x) cs <- colSums(x) prob <- rs %*% t(cs) po <- tr(x) pc <- tr(prob) if(prod(dim(x))==1) {message("Your data seem to have no variance and in complete agreement across raters. Check your data.") kappa <- NA} else { kappa <- (po-pc)/(1-pc)} #(model - data)/(1-model) if(is.null(w)) { w <- matrix(0,ncol=len,nrow=len) w[] <- abs((col(w) - row(w)))^2 #squared weights w <- 1 - w/(len-1)^2} #1 - squared weights/k colnames(w) <- rownames(w) <- colnames(x) weighted.prob <- w * prob weighted.obser <- w * x wpo <- sum(weighted.obser) wpc <- sum(weighted.prob) colw <- colSums(w*cs) roww <- colSums(w*rs) #changed back following a report Ju, Yu-Jeng #roww <- rowSums(w*rs) #corrected following a report by Lisa Avery if((!is.null(n.obs)) & (tot==1)) tot <- n.obs I <- diag(1,len,len) Vark <- (1/(tot*(1-pc)^4))* (tr(x * (I * (1-pc) - (rs %+% t(cs ))*(1-po))^2 ) + (1-po)^2 * (sum(x * (cs %+% t(rs ))^2) - tr(x * (cs %+% t(rs ))^2)) -(po*pc - 2*pc +po)^2 ) #Varkw <- (1/(tot*(1-wpc)^4))* (sum(x * (w * (1-wpc)- (colw %+% t(roww ))*(1-wpo))^2 ) -(wpo*wpc - 2*wpc +wpo)^2 ) Varkw <- (1/(tot*(1-wpc)^4))* (sum(x * (w * (1-wpc)- (colw %+% t(roww ))*(1-wpo))^2 ) -(wpo*wpc - 2*wpc +wpo)^2 ) if(tr(w) > 0) {wkappa <- (wpo-wpc)/(1-wpc) } else { wkappa <- 1- wpo/wpc} if((!is.null(n.obs)) & (tot==1)) tot <- n.obs if(is.na(Vark) || (Vark < 0)) {bad <- TRUE Vark <- 0} if(is.na(Varkw) || (Varkw < 0)) {bad <- TRUE Varkw <- 0} bounds <- matrix(NA,2,3) colnames(bounds) <- c("lower","estimate","upper") rownames(bounds) <- c("unweighted kappa","weighted kappa") bounds[1,2] <- kappa bounds[2,2] <- wkappa bounds[1,1] <- kappa + qnorm(alpha/2) * sqrt(Vark) bounds[1,3] <- kappa - qnorm(alpha/2) * sqrt(Vark) bounds[2,1] <- wkappa + qnorm(alpha/2) * sqrt(Varkw) bounds[2,3] <- wkappa - qnorm(alpha/2) * sqrt(Varkw) #if(!is.na(any(abs(bounds))) & (any(abs(bounds) > 1))) {bounds[bounds > 1] <- 1 if(any(!is.na(abs(bounds))) & (any(abs(bounds) > 1))) {bounds[bounds > 1] <- 1 bounds[bounds < -1] <- -1 warning("upper or lower confidence interval exceed abs(1) and set to +/- 1. ") } result <- list(kappa=kappa,weighted.kappa = wkappa,n.obs=tot,agree=x,weight=w,var.kappa =Vark, var.weighted = Varkw,confid=bounds,plevel=alpha,bad=bad,Call=cl) class(result) <- c("psych","kappa") return(result) } "krip" <- "krippendorf" <- function(x) { x <- as.matrix(x) tot <- sum(x) n <- tot * NCOL(x) x <- x/tot #convert to probabilities just in case they are not already rs <- rowSums(x) cs <- colSums(x) p <- (rs + cs)/2 #these are the average marginals obs <- sum(x) - tr(x) #this is the observed misses exp <- p %*% t(p) exp <- sum(exp) -tr(exp) #this the expected misses pi <- 1 - obs/exp krip <- pi * (n)/(n-1) #this is unclear what this should be return(list(krippendorf=krip,scott=pi)) } psych/R/factor.minres.R0000644000176200001440000002303612456326542014513 0ustar liggesusers"factor.minres" <- function(r,nfactors=1,residuals=FALSE,rotate="varimax",n.obs = NA,scores=FALSE,SMC=TRUE,missing=FALSE,impute="median", min.err = .001,digits=2,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres") { cl <- match.call() .Deprecated("fa",msg="factor.minres is deprecated. Please use the fa function.") ##first some functions that are internal to factor.minres #this does the ULS fitting "fit.residuals.ols" <- function(Psi,S,nf) { diag(S) <- 1- Psi eigens <- eigen(S) eigens$values[eigens$values < .Machine$double.eps] <- 100 * .Machine$double.eps if(nf >1 ) {loadings <- eigens$vectors[,1:nf] %*% diag(sqrt(eigens$values[1:nf])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) residual <- (S - model)^2 diag(residual) <- 0 error <- sum(residual) } "fit.residuals.min.res" <- function(Psi,S,nf) { diag(S) <-1- Psi eigens <- eigen(S) #loadings <- eigen.loadings(eigens)[,1:nf] if(nf >1 ) {loadings <- eigens$vectors[,1:nf] %*% diag(sqrt(eigens$values[1:nf])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) residual <- (S - model) diag(residual) <- 0 error <- det(residual) } #this code is taken (with minor modification to make ULS) from factanal #it does the iterative calls to fit.residuals "min.res" <- function(S,nf) { S.smc <- smc(S) if(sum(S.smc) == nf) {start <- rep(.5,nf)} else {start <- 1- S.smc} res <- optim(start, fit.residuals.ols, method = "L-BFGS-B", lower = .005, upper = 1, control = c(list(fnscale = 1, parscale = rep(0.01, length(start)))), nf= nf, S=S ) Lambda <- FAout(res$par, S, nf) result <- list(loadings=Lambda,res=res) } #these were also taken from factanal FAout <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE) L <- E$vectors[, 1L:q, drop = FALSE] load <- L %*% diag(sqrt(pmax(E$values[1L:q] - 1, 0)), q) diag(sqrt(Psi)) %*% load } FAfn <- function(Psi, S, q) { sc <- diag(1/sqrt(Psi)) Sstar <- sc %*% S %*% sc E <- eigen(Sstar, symmetric = TRUE, only.values = TRUE) e <- E$values[-(1L:q)] e <- sum(log(e) - e) - q + nrow(S) -e } ## now start the main function if((fm !="pa") & (fm != "minres")) {message("factor method not specified correctly, minimum residual used used") fm <- "minres" } n <- dim(r)[2] if (n!=dim(r)[1]) { n.obs <- dim(r)[1] if(scores) {x.matrix <- r if(missing) { #impute values miss <- which(is.na(x.matrix),arr.ind=TRUE) if(impute=="mean") { item.means <- colMeans(x.matrix,na.rm=TRUE) #replace missing values with means x.matrix[miss]<- item.means[miss[,2]]} else { item.med <- apply(x.matrix,2,median,na.rm=TRUE) #replace missing with medians x.matrix[miss]<- item.med[miss[,2]]} }} r <- cor(r,use="pairwise") # if given a rectangular matrix, then find the correlations first } else { if(!is.matrix(r)) { r <- as.matrix(r)} sds <- sqrt(diag(r)) #convert covariance matrices to correlation matrices r <- r/(sds %o% sds) } #added June 9, 2008 if (!residuals) { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),fit=0)} else { result <- list(values=c(rep(0,n)),rotation=rotate,n.obs=n.obs,communality=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)} r.mat <- r Phi <- NULL colnames(r.mat) <- rownames(r.mat) <- colnames(r) if(SMC) { if(nfactors < n/2) {diag(r.mat) <- smc(r) } else {if (warnings) message("too many factors requested for this number of variables to use SMC, 1s used instead")} } orig <- diag(r) comm <- sum(diag(r.mat)) err <- comm i <- 1 comm.list <- list() if(fm=="pa") { while(err > min.err) #iteratively replace the diagonal with our revised communality estimate { eigens <- eigen(r.mat,symmetric=symmetric) #loadings <- eigen.loadings(eigens)[,1:nfactors] if(nfactors >1 ) {loadings <- eigens$vectors[,1:nfactors] %*% diag(sqrt(eigens$values[1:nfactors])) } else {loadings <- eigens$vectors[,1] * sqrt(eigens$values[1] ) } model <- loadings %*% t(loadings) new <- diag(model) comm1 <- sum(new) diag(r.mat) <- new err <- abs(comm-comm1) if(is.na(err)) {warning("imaginary eigen value condition encountered in fa,\n Try again with SMC=FALSE \n exiting fa") break} comm <- comm1 comm.list[[i]] <- comm1 i <- i + 1 if(i > max.iter) {if(warnings) {message("maximum iteration exceeded")} err <-0 } } } if(fm == "minres") { #added April 12, 2009 to do ULS fits uls <- min.res(r,nfactors) eigens <- eigen(r) #used for the summary stats result$par <- uls$res loadings <- uls$loadings } # a weird condition that happens with the Eysenck data #making the matrix symmetric solves this problem if(!is.double(loadings)) {warning('the matrix has produced imaginary results -- proceed with caution') loadings <- matrix(as.double(loadings),ncol=nfactors) } #make each vector signed so that the maximum loading is positive - probably should do after rotation #Alternatively, flip to make the colSums of loading positive if (FALSE) { if (nfactors >1) { maxabs <- apply(apply(loadings,2,abs),2,which.max) sign.max <- vector(mode="numeric",length=nfactors) for (i in 1: nfactors) {sign.max[i] <- sign(loadings[maxabs[i],i])} loadings <- loadings %*% diag(sign.max) } else { mini <- min(loadings) maxi <- max(loadings) if (abs(mini) > maxi) {loadings <- -loadings } loadings <- as.matrix(loadings) if(fm == "minres") {colnames(loadings) <- "MR1"} else {colnames(loadings) <- "PA1"} } #sign of largest loading is positive } #added January 5, 2009 to flip based upon colSums of loadings if (nfactors >1) {sign.tot <- vector(mode="numeric",length=nfactors) sign.tot <- sign(colSums(loadings)) loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) <0) {loadings <- -as.matrix(loadings)} else {loadings <- as.matrix(loadings)} colnames(loadings) <- "MR1" } #end addition if(fm == "minres") {colnames(loadings) <- paste("MR",1:nfactors,sep='') } else {colnames(loadings) <- paste("PA",1:nfactors,sep='')} rownames(loadings) <- rownames(r) loadings[loadings==0.0] <- 10^-15 #added to stop a problem with varimax if loadings are exactly 0 model <- loadings %*% t(loadings) f.loadings <- loadings #used to pass them to factor.stats if(rotate != "none") {if (nfactors > 1) { if (rotate=="varimax" | rotate=="quartimax") { rotated <- do.call(rotate,list(loadings)) loadings <- rotated$loadings Phi <- NULL} else { if ((rotate=="promax")|(rotate=="Promax")) {pro <- Promax(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate == "cluster") {loadings <- varimax(loadings)$loadings pro <- target.rot(loadings) loadings <- pro$loadings Phi <- pro$Phi} else { if (rotate =="oblimin"| rotate=="quartimin" | rotate== "simplimax") { if (!requireNamespace('GPArotation')) {warning("I am sorry, to do these rotations requires the GPArotation package to be installed") Phi <- NULL} else { ob <- do.call(rotate,list(loadings) ) loadings <- ob$loadings Phi <- ob$Phi} } }}} }} #just in case the rotation changes the order of the factors, sort them #added October 30, 2008 if(nfactors >1) { ev.rotated <- diag(t(loadings) %*% loadings) ev.order <- order(ev.rotated,decreasing=TRUE) loadings <- loadings[,ev.order]} rownames(loadings) <- colnames(r) if(!is.null(Phi)) {Phi <- Phi[ev.order,ev.order] } #January 20, 2009 but, then, we also need to change the order of the rotation matrix! class(loadings) <- "loadings" if(nfactors < 1) nfactors <- n result <- factor.stats(r,loadings,Phi,n.obs) #do stats as a subroutine common to several functions result$communality <- round(diag(model),digits) result$uniquenesses <- round(diag(r-model),digits) result$values <- round(eigens$values,digits) result$loadings <- loadings if(!is.null(Phi)) {result$Phi <- Phi} if(fm == "pa") result$communality.iterations <- round(unlist(comm.list),digits) if(scores) {result$scores <- factor.scores(x.matrix,loadings) } result$factors <- nfactors result$fn <- "factor.minres" result$fm <- fm result$Call <- cl class(result) <- c("psych", "fa") return(result) } #modified October 30, 2008 to sort the rotated loadings matrix by the eigen values. psych/R/plot.irt.R0000644000176200001440000004243213634741573013521 0ustar liggesusers"plot.irt" <- function(x,xlab,ylab,main,D,type=c("ICC","IIC","test"),cut=.3,labels=NULL,keys=NULL,xlim,ylim,y2lab,lncol="black",...) { if(inherits(x,"irt.poly")) { if(missing(type)) type = "IIC" plot.poly(x=x,D=D,xlab=xlab,ylab=ylab,xlim=xlim,ylim=ylim,main=main,type=type,cut=cut,labels=labels,keys=keys,y2lab=y2lab,lncol=lncol,...)} else { item <- x temp <- list() sumtemp <- list() byKeys <- FALSE if((is.data.frame(x)) | (is.matrix(x))) {nf <- dim(x)[2] -1} else { nf <- length(x$irt$difficulty)} #if there was more than 1 factor, repeat the figure nf times #or, if there is one factor but multiple keys if(!is.null(keys)) { nkeys = ncol(keys) if (nf < nkeys) {byKeys <- TRUE nf <- nkeys} } for(f in 1:nf) {if((is.data.frame(item)) | (is.matrix(item))) {if(byKeys) {discrimination <- item[,1]} else {discrimination <- item[,f]} if(byKeys) {location <- item[,2]} else {location <- item[,f+1]} } else { if(byKeys) {discrimination=item$irt$discrimination[,1]} else {discrimination=item$irt$discrimination[,f]} if(!is.null(keys)) discrimination <- discrimination *abs( keys[,f]) if(byKeys) {location=item$irt$difficulty[[1]]} else {location=item$irt$difficulty[[f]] }} x <- NULL nvar <- length(discrimination) if(is.null(labels)) {if(!is.null(rownames(item$irt$discrimination))) {labels = rownames(item$irt$discrimination)} else {labels <- 1:nvar}} if(missing(type)) {type = "IIC"} if(missing(D)) {D <- 1.702 if(missing(xlab)) xlab <- "Latent Trait (normal scale)" x <- seq(-3,3,.1) summaryx <- seq(-3,3,1)} if(D==1) {if(missing(xlab)) xlab <- "Latent Trait (logistic scale)"} if(missing(xlab)) xlab <- "Latent Trait" if(is.null(x)) {x <- seq(-4,4,.1) summaryx <- seq(-3,3,1)} lenx <- length(x) sumInfo <- matrix(NA,ncol=nvar,nrow=length(summaryx)) summaryx <- as.matrix(summaryx,ncol=1) if(type=="ICC") { summtInfo <- NULL if(nf > 1) { if(missing(main)) {main1 <- paste("Item parameters from factor analysis for factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Item parameters from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} if(missing(ylab)) ylab <- "Probability of Response" if(length(lncol) < 2) lncol <- rep(lncol,nvar) ii <- 1 while((abs(discrimination[ii]) < cut) && (ii < nvar)) {ii <- ii + 1} plot(x,logistic(x,a=discrimination[ii]*D,d=location[ii]),ylim=c(0,1),ylab=ylab,xlab=xlab,type="l",main=main1,col=lncol[1],...) text(location[ii],.53,labels[ii]) for(i in (ii+1):nvar) { if(abs(discrimination[i]) > cut) { lines(x,logistic(x,a=discrimination[i]*D,d=location[i]),lty=c(1:6)[(i %% 6) + 1 ],col=lncol[i],...) text(location[i],.53,labels[i])} } } else { #not ICC tInfo <- matrix(0,ncol=nvar,nrow=length(x)) for(i in 1:nvar) { if(abs(discrimination[i]) > cut) { tInfo[,i] <- logisticInfo(x,a=discrimination[i]*D,d=location[i]) sumInfo[,i] <- logisticInfo(summaryx,a=discrimination[i]*D,d=location[i]) } else {tInfo[,i] <- 0 sumInfo[,i] <- 0} } AUC <- colSums(tInfo) max.info <- apply(tInfo,2,which.max) if(type=="test") { if(nf > 1) { if(missing(main)) {main1 <- paste("Test information -- item parameters from factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Test information -- item parameters from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} # if(missing(main)) main <- "Test information -- item parameters from factor analysis" if(missing(y2lab)) y2lab <- "Reliability" testInfo <- rowSums(tInfo) if(missing(ylab)) ylab <- "Test Information" if(missing(xlab)) xlab <- "Latent Trait (normal scale)" if(length(lncol)< 2) lncol <- rep(lncol,nvar) op <- par(mar=c(5,4,4,4)) #set the margins a bit wider if(missing(ylim) ) ylim <- c(0,max(testInfo)) plot(x,testInfo,typ="l",ylim=ylim,ylab=ylab,xlab=xlab,main=main1,col=lncol[1],...) ax4 <- seq(0,max(testInfo),max(testInfo)/4) rel4 <- round(1-1/ax4,2) rel4[1] <- NA axis(4,at=ax4,rel4) mtext(y2lab,side=4,line=2) op <- par(op) #set them back to what we had before } else { if(missing(ylab)) ylab <- "Item Information" #if(missing(main)) main <- "Item information from factor analysis" if(nf > 1) { if(missing(main)) {main1 <- paste("Item information from factor analysis for factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Item information from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} if(length(lncol) <2) lncol <- rep(lncol,nvar) ii <- 1 while((abs(discrimination[ii]) < cut) && (ii < nvar)) {ii <- ii + 1} if(missing(ylim)) {ylimit=c(0,max(tInfo)+.03)} else {ylimit <- ylim} plot(x,logisticInfo(x,a=discrimination[ii]*D,d=location[ii]),ylim=ylimit,ylab=ylab,xlab=xlab,type="l",main=main1,col=lncol[1],...) text(location[ii],max(tInfo[,ii])+.03,labels[ii]) for(i in (ii+1):nvar) { if(abs(discrimination[i]) > cut) { lines(x,logisticInfo(x,a=discrimination[i]*D,d=location[i]),lty=c(1:6)[(i %% 6) + 1 ],col=lncol[i]) text(location[i],max(tInfo[,i])+.02,labels[i]) }}} if (type !="ICC") {temp[[f]] <- list(AUC=AUC,max.info=max.info) sumInfo <- t(sumInfo) colnames(sumInfo) <- summaryx rownames(sumInfo) <- rownames(item$rho) sumtemp[[f]] <- sumInfo} } devAskNewPage(ask = TRUE)} #end of f loop devAskNewPage(ask = FALSE) if(type!="ICC") { AUC <- matrix(NA,ncol=nf,nrow=nvar) max.info <- matrix(NA,ncol=nf,nrow=nvar) for(f in 1:nf) { AUC[,f] <- temp[[f]]$AUC max.info[,f] <- temp[[f]]$max.info} AUC <- AUC/lenx #quasi normalize it max.info <- (max.info - lenx/2)*6/(lenx-1) max.info[max.info < -2.9] <- NA if(byKeys) {colnames(AUC) <- colnames(max.info) <- colnames(keys)} else {colnames(AUC) <- colnames(max.info) <- colnames(item$irt$discrimination)} rownames(AUC) <- rownames(max.info) <- rownames(item$rho) result <- list(AUC=AUC,max.info=max.info,sumInfo =sumtemp) invisible(result) class(result) <- c("psych","polyinfo") invisible(result)} } } "logisticInfo" <- function(x,d=0, a=1,c=0,z=1) {c + (z-c)*exp(a*(d-x))*a^2/(1+exp(a*(d-x)))^2} "plot.poly" <- function(x,D,xlab,ylab,xlim,ylim,main,type=c("ICC","IIC","test"),cut=.3,labels=NULL,keys=NULL,y2lab,lncol="black",...) { if(missing(ylim)) {dynamic.ylim <- TRUE} else {dynamic.ylim <- FALSE} item <- x byKeys <- FALSE if((is.data.frame(x)) | (is.matrix(x))) {nf <- dim(x)[2] -1} else { nf <- length(x$irt$difficulty)} #if there was more than 1 factor, repeat the figure nf times #or, if there is one factor but multiple keys if(!is.null(keys)) { nkeys = ncol(keys) if (nf < nkeys) {byKeys <- TRUE nf <- nkeys} } temp <- list() sumtemp <- list() x <- NULL nvar <- length(item$irt$discrimination[,1]) ncat <- dim(item$irt$difficulty[[1]])[2] if(length(lncol) < 2) lncol <- rep(lncol,nvar) if(missing(type)) {type = "IIC"} if(missing(D)) {D <- 1.702 if(missing(xlab)) xlab <- "Latent Trait (normal scale)" if(missing(xlim)) {x <- seq(-3,3,.1) } else {x <- seq(xlim[1],xlim[2],.1)} #used for item summary table summaryx <- seq(-3,3,1) } if(D==1) {if(missing(xlab)) xlab <- "Latent Trait (logistic scale)"} if(missing(xlab)) xlab <- "Latent Trait" if(is.null(x)) {x <- seq(-4,4,.1) summaryx <- seq(-3,3,1)} #used for item summary table if(is.null(labels)) {if(!is.null(rownames(item$irt$discrimination))) {labels = rownames(item$irt$discrimination)} else {labels <- 1:nvar}} lenx <- length(x) sumInfo <- matrix(NA,ncol=nvar,nrow=length(summaryx)) #if there was more than 1 factor, repeat the figure nf times for(f in 1:nf) {if(byKeys) {discrimination <- item$irt$discrimination[,1]} else {discrimination <-item$irt$discrimination[,f]} if(!is.null(keys)) discrimination <- discrimination * abs(keys[,f]) if(any(is.nan(discrimination))) {bad <- which(is.nan(discrimination)) discrimination[is.nan(discrimination)] <- max(discrimination,na.rm=TRUE) warning("An discrimination with a NaN value was replaced with the maximum discrimination for factor = ",f, " and item ",labels[bad], "\nexamine the factor analysis object (fa) to identify the Heywood case") } if(byKeys) {location=item$irt$difficulty[[1]]} else { location=item$irt$difficulty[[f]]} difficulty <- location[,1:ncat] if(type=="ICC") { #this draws the item characteristic curves #summtInfo <- NULL if(nf > 1) { if(missing(main)) {main1 <- paste("Item parameters from factor analysis for factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Item parameters from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} # if(missing(main)) {main1 <- "Item parameters from factor analysis" # if(nf > 1) {main1 <- paste(main1,' for factor ',f)} else {if ( length(main) > 1) main1 <- main[f]}} else {if(nf > 1) {main1 <-paste( main,' for factor ',f)} else {main1 <- main}} if(missing(ylab)) ylab <- "Probability of Response" if(dynamic.ylim) ylim <- c(0,1) for(i in 1:nvar) { if (abs(discrimination[i]) > cut) { if(discrimination[i] > 0 ) { plot(x,logistic(x,a=-D*discrimination[i],d=location[i,1]),ylim=ylim,ylab=ylab,xlab=xlab,type="l",main=main1,col=lncol[1],...) text(0,.70,labels[i])} else { plot(x,logistic(x,a=D*discrimination[i],d=location[i,1]),ylim=ylim,ylab=ylab,xlab=xlab,type="l",main=main1,col=lncol[1],...) text(max(0),.7,paste("-",labels[i],sep="")) } for (j in 2:(ncat)) { if(discrimination[i] > 0 ) { lines(x,(-logistic(x,a=D*discrimination[i],d=location[i,j])+logistic(x,a=D*discrimination[i],d=location[i,j-1])),lty=c(1:6)[(j %% 6) + 1 ],col=lncol[i]) } else {lines(x,(-logistic(x,a=-D*discrimination[i],d= location[i,j])+logistic(x,a=-D*discrimination[i],d=location[i,j-1])),lty=c(1:6)[(j %% 6) + 1 ],col=lncol[i])} } if(discrimination[i] > 0 ) { lines(x,(logistic(x,a=D*discrimination[i],d=location[i,ncat])),col=lncol[i]) } else {lines(x,(logistic(x,a=-D*discrimination[i],d=location[i,ncat])),col=lncol[i]) } }} } #now do the summary stuff for all cases #summaryx <- as.matrix(summaryx,ncol=1) # summtInfo <- apply(summaryx,1,logisticInfo,a=discrimination,d=sign(discrimination) * difficulty) #notice that we just need to add the logistics, not the differences # summtInfo <- array(unlist(summtInfo),dim=c(nvar,ncat,length(summaryx))) #this is now an array with items levels and summaryx #item and test information x <- as.matrix(x,ncol=1) summaryx <- as.matrix(summaryx,ncol=1) tInfo <- apply(x,1,logisticInfo,a=discrimination,d=sign(discrimination) * difficulty) tInfo <- array(unlist(tInfo),dim=c(nvar,ncat,length(x))) #this is now an array with items levels and x summtInfo <- apply(summaryx,1,logisticInfo,a=discrimination,d=sign(discrimination) * difficulty) summtInfo <- array(unlist(summtInfo),dim=c(nvar,ncat,length(summaryx))) #this is now an array with items levels and summaryx tInfo[is.nan(tInfo)] <- 0 #this gets around the problem of no values summtInfo[is.nan(summtInfo)] <- 0 #this gets around the problem of no values testInfo <- matrix(NA,ncol=nvar,nrow=length(x)) sumInfo <- matrix(NA,ncol=nvar,nrow=length(summaryx)) for (xi in 1:length(x)) { for (i in 1:nvar) { if (abs(discrimination[i]) > cut) { testInfo[[xi,i]] <- sum(tInfo[i,,xi]) } else {testInfo[[xi,i]] <- 0 }} } for (xi in 1:length(summaryx)) { for (i in 1:nvar) { if (abs(discrimination[i]) > cut) { sumInfo[[xi,i]] <- sum(summtInfo[i,,xi]) } else {sumInfo[[xi,i]] <- 0}} } if(type=="test") { if(nf > 1) { if(missing(main)) {main1 <- paste("Test information from factor analysis for factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Test information from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} # if(missing(main)) {main1 <- "Test information from factor analysis " # if(nf > 1) { # main1 <- paste(main1,' for factor ',f)} else { # if (length(main) > 1) {main1 <- main[f] # } else {if(nf > 1) {main1 <- paste(main, ' for factor ',f)} else {main1 <- main}} # } if(missing(ylab)) ylab <- "Test Information" if(missing(y2lab)) y2lab <- "Reliability" rsInfo <- rowSums(testInfo) if(dynamic.ylim) ylim = c(0,max(rsInfo)) op <- par(mar=c(5,4,4,4)) #set the margins a bit wider plot(x,rsInfo,typ="l",ylim=ylim,ylab=ylab,xlab=xlab,main=main1,col=lncol[1],...) ax4 <- seq(0,ylim[2],ylim[2]/4) rel4 <- round(1-1/ax4,2) rel4[1] <- NA axis(4,at=ax4,rel4) mtext(y2lab,side=4,line=2) op <- par(op) } else { if(type != "ICC") { if(missing(ylab)) ylab <- "Item Information" if(nf > 1) { if(missing(main)) {main1 <- paste("Item information from factor analysis for factor" ,f)} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- paste( main,' for factor ', f)}} } else {if(missing(main)) {main1 <- paste("Item information from factor analysis")} else {if (length(main) > 1) {main1 <- main[f]} else {main1 <- main}}} if(dynamic.ylim) ylim <- c(0,max(testInfo,na.rm=TRUE)+.03) ii <- 1 while((abs(discrimination[ii]) < cut) && (ii < nvar)) {ii <- ii + 1} plot(x,testInfo[,ii],ylim=ylim,ylab=ylab,xlab=xlab,type="l",main=main1,col=lncol[1],...) if(discrimination[ii] > 0 ) {text(x[which.max(testInfo[,ii])],max(testInfo[,ii])+.03,labels[ii])} else {text(x[which.max(testInfo[,ii])],max(testInfo[,ii])+.03,paste("-",labels[ii],sep=""))} for(i in (ii+1):nvar) { if (abs(discrimination[i]) > cut) { lines(x,testInfo[,i],lty=c(1:6)[(i %% 6) + 1 ],col=lncol[i]) if(discrimination[i] > 0 ) { text(x[which.max(testInfo[,i])],max(testInfo[,i])+.03,labels[i]) } else {text(x[which.max(testInfo[,i])],max(testInfo[,i])+.03,paste("-",labels[i],sep=""))} }} } } #if (type !="ICC") { temp[[f]] <- testInfo sumInfo <- t(sumInfo) rownames(sumInfo) <- labels colnames(sumInfo) <- summaryx sumtemp[[f]] <- sumInfo #keep the summary information for each factor } #end of 1:nf loop AUC <- matrix(NaN,ncol=nf,nrow=nvar) max.info <- matrix(NaN,ncol=nf,nrow=nvar) for(f in 1:nf) { AUC[,f] <- colSums(temp[[f]]) max.info[,f] <- apply(temp[[f]],2,which.max) } AUC <- AUC/lenx #quasi normalize it #max.info[is.nan(AUC) ] <- NA max.info <- (max.info - lenx/2)*6/(lenx-1) max.info[max.info < -2.9] <- NA if(byKeys) {colnames(AUC) <- colnames(max.info) <- colnames(keys)} else {colnames(AUC) <- colnames(max.info) <- colnames(item$irt$discrimination)} rownames(AUC) <- rownames(max.info) <- rownames(item$rho) result <- list(AUC=AUC,max.info=max.info,sumInfo=sumtemp) invisible(result) class(result) <- c("psych","polyinfo") invisible(result) } "irt.select" <- function(x,y) { if(is.null(dim(x$tau))) {typ="tet"} else {typ="poly"} rho <- x$rho[y,y] tau <- x$tau[y] n.obs <- x$n.obs result <- list(rho=rho,tau=tau,n.obs=n.obs) class(result) <- c("psych",typ) return(result) } "irt.stats.like" <-function(items,stats=NULL,keys=NULL,cut=.3) { results <- list() tau <- irt.tau(items) if(!is.null(stats)) { nf <- dim(stats$loadings)[2] diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau/sqrt(1-stats$loadings[,i]^2) } discrim <- stats$loadings/sqrt(1-stats$loadings^2) } else {diffi <- tau nf <- NROW(tau)} if(!is.null(keys)) { if(is.null(dim(keys))) { nf <- 1 } else {nf <- dim(keys)[2]} diffi <- list() for (i in 1:nf) {diffi[[i]] <- tau } discrim <- keys } else {discrim <- as.matrix(rep(1,nf),ncol=1) rownames(discrim) <- colnames(tau) colnames(discrim) <-"d" } class(diffi) <- NULL class(discrim) <- NULL difficulty <- list(diffi) results <- list(difficulty=difficulty,discrimination=discrim) # results$irt <- irt #class(results) <- c("psych","irt.poly") return(results) } #allows the reading of data directly to score make.irt.stats <- function(difficulty,discrimination) { discrimination <- as.matrix(discrimination,ncol=1) rownames(discrimination) <- rownames(difficulty) colnames(discrimination) <- "d" difficulty <- list(difficulty) results <- list(difficulty=difficulty,discrimination = discrimination) return(results) } "irt.se" <- function( stats,scores=0, D=1.702) { if(missing(D)) D <- 1.702 nf <- length(stats$irt$difficulty) nscore <- length(scores) info <- matrix(NA,ncol=nf,nrow=nscore) for(i in 1:nscore) { for (f in 1:nf) { discrimination=stats$irt$discrimination[,f] * D location=stats$irt$difficulty[[f]] info[i,f] <- sum(logisticInfo(x=scores[i],d=location*sign(discrimination),a=discrimination))} } se <- sqrt(1/info) info.df <- data.frame(scores,info,se = se) return(info.df) } psych/R/splitHalf.R0000644000176200001440000001240614515621321013655 0ustar liggesusers#November 30, 2013 #parts adapted from combn #modified 6/20/21 to just do the unique splits if < 15,000 *i.e. a 16 variable problem #modified 9/7/21 to handle keys as lists of variable names with negative signs "splitHalf"<- function(r,raw=FALSE,brute=FALSE,n.sample=15000,covar=FALSE,check.keys=TRUE,key=NULL,ci=.05,use="pairwise") { cl <- match.call() split <- function(o,n) { A <- B <- rep(0,n) A [o] <- B[-o] <- 1 A[-o] <- B[o] <- 0 AB <- cbind(A,B) R <- t(AB) %*% r %*% AB Rab <- R[1,2]/sqrt(R[1,1]*R[2,2]) #rab <- 2*Rab/(1+Rab) rab <- 4*R[1,2]/sum(R) result <- list(rab=rab,AB=AB)} #if(!is.null(key)) {r <- r[,selectFromKeys(key)]} #made this acgain 12/23/23 v.names <- colnames(r) keys <- key maxrb <- -9999 minrb <- 2 n <- ncol(r) n2 <- trunc(n/2) n.obs <- nrow(r) #if(n.obs > n) { r <- cov(r,use=use)} # fails if n.obs < n if(!isCovariance(r)) { r <- cov(r,use=use)} if(!covar) r <- cov2cor(r) if(check.keys && is.null(keys)) { p1 <- principal(r,covar=covar) if(any(p1$loadings < 0)) warning("Some items were negatively correlated with total scale and were automatically reversed.") keys <- 1- 2* (p1$loadings < 0 ) } #keys is now a vector of 1s and -1s if(is.null(keys)) {keys <- rep(1,n)} else { #in case keys is a list, we need to tranform to a keys vector if(is.character(keys)) {temp <- rep(1,n) temp[which(strtrim(keys,1)=="-")] <- -1 keys <- temp } keys<- as.vector(keys) if(length(keys) < n) {temp <- keys #this is the option of keying just the reversals keys <- rep(1,n) names(keys) <- colnames(r) keys[temp] <- -1} } key.d <- diag(keys) r <- key.d %*% r %*% key.d signkey <- strtrim(keys,1) signkey[signkey=="1"] <- "" colnames(r) <- paste(signkey,colnames(r),sep="") e <- 0 m <- n2 h <- m sumr <- sum(r) off <- r diag(off) <- 0 sum.off <- sum(off) sumsq.off <- sum(off^2) sum.off <- sumr - tr(r) alpha <- (sum.off/sumr) * n/(n-1) tsmc <- sum(smc(r,covar=covar)) lambda6 <- (sum.off + tsmc)/sumr lambda2 <- (sum.off+ sqrt(sumsq.off*n/(n-1)))/sumr result <- NULL med.r <- median(r[lower.tri(r)],na.rm=TRUE) #find the median correlation av.r <- mean(r[lower.tri(r)],na.rm=TRUE) sumr <- 0 x <- seq_len(n) a <- seq_len(m) #count <- as.integer(round(choose(n, m)))/2 #doesn't work for very large values of n. #count <- round(choose(n, m))/2 count <- round(choose(n, m)) #do all splits because there is a problem with odd values 10/21/23 if(brute || ((count <= n.sample))) { #brute force -- try all combinations brute <- TRUE if(raw) result <- rep(NA,count) #keep the results if raw #first do the original order o <- a sp <- split(o,n) if(raw) result[1] <- abs(sp$rab) sp$rab <- abs(sp$rab) maxrb <- sp$rab maxAB <- sp$AB minrb <- sp$rab minAB <- sp$AB sumr <- sp$rab #added to get the first one 10/6/18 in response to bug report by Wes Bonifay i <- 2 #now, do the rest while (i < (count+1)) { #adapted (taken) from combn if (e < n - h) { h <- 1L e <- a[m] j <- 1L } else { e <- a[m - h] h <- h + 1L j <- 1L:h } a[m - h + j] <- e + j o <- x[a] sp <- split(o,n) if(raw) result[i] <- abs(sp$rab ) sp$rab <- abs(sp$rab) #conrolling for a rare case sumr <- sumr+ sp$rab # summing all of the split halfs to eventually report mean splithalf if(sp$rab > maxrb) {maxrb <- sp$rab maxAB <- sp$AB} if(sp$rab < minrb) {minrb <- sp$rab minAB <- sp$AB} i <- i + 1L }} else { #sample the alternatives result <- rep(NA,n.sample) sumr <- 0 for (i in 1:n.sample) { #result <- mclapply(1:n.sample,{ #use mclapply to allow for parallelism o <- sample(n,n2) sp <- split(o,n) if(raw) result[i] <- sp$rab #if(raw) result <- sp$rab #if mclapply sumr <- sumr+ sp$rab if(sp$rab > maxrb) {maxrb <- sp$rab maxAB <- sp$AB} if(sp$rab < minrb) { minrb <- sp$rab minAB <- sp$AB} } # ) #if using mclapply } #now if(brute) {meanr <- sumr/count } else {meanr <- sumr/n.sample } kd <- diag(key.d) #reverse them so we can use the keys maxAB =maxAB * kd minAB = minAB * kd rownames(maxAB) <- rownames(minAB) <- v.names maxAB <- keys2list(maxAB) minAB <- keys2list(minAB) if(!anyNA(result)) { ci <- quantile(result,c(ci/2,.5, 1 - ci/2))} else {ci <- rep(NA,3) } if(raw) { results <- list(maxrb=maxrb,minrb=minrb,maxAB=maxAB,minAB=minAB,meanr=meanr,av.r=av.r,med.r=med.r, alpha=alpha,lambda2 = lambda2, lambda6=lambda6,raw = result,ci=ci,covar=covar,Call = cl) } else {results <- list(maxrb=maxrb,minrb=minrb,maxAB=maxAB,minAB=minAB,meanr=meanr,av.r=av.r,med.r=med.r,alpha=alpha,lambda2 = lambda2,lambda6=lambda6,ci=ci,covar=covar, Call=cl)} class(results) <- c("psych","split") return(results) } psych/R/factor.congruence.R0000744000176200001440000001325114152436546015346 0ustar liggesusers#modified June 25, 2018 to handle omegaSem output as well #modified October 9, 2015 to add the NA option #January 27, 2014 added fa.congruence to clean up calls #modified March 12 to allow for a list of factor solutions #Modified December 11, 2019 to use inherits rather than class "factor.congruence" <- function (x,y=NULL,digits=2,use=NULL,structure=FALSE) { fa.congruence(x=x,y=y,digits=digits,use=use,structure=structure) } "fa.congruence" <- function (x,y=NULL,digits=2,use=NULL,structure=FALSE) { direct <- extend <- esem <- factanal <- other <- NA obnames <- cs(fa, omega, omegaSem, directSl, direct, omegaDirect, principal, iclust,extend,esem, factanal) if(is.null(y) && is.list(x)) { n <- length(x) for (i in 1:n) { xi <- x[[i]] if(length(class(xi)) > 1) { cln <- inherits(xi, obnames, which=TRUE) if (any(cln > 1)) {cln <- obnames[which(cln >0)]} else {cln <- "other"}} else {cln <- "other"} switch(cln, fa = {if(structure) {xi <- xi$Structure} else {xi <- xi$loadings}}, omega = {xi <- xi$schmid$sl xi <- as.matrix(xi[,1:(ncol(xi)-2)])}, omegaSem = {xi <- xi$omega.efa$cfa.loads}, directSl = {xi <- xi$direct}, direct = {xi <- xi$direct}, omegaDirect = {xi <- xi$loadings}, principal = {xi <- xi$loadings}, iclust = {xi <- xi$loadings}, extend = {xi <- xi$loadings}, esem = {xi <- xi$loadings}, other = {if(inherits(xi, "factanal")) {xi <- xi$loadings} else {xi <- as.matrix(xi)}} ) if(i==1) {xg <- xi} else {xg <- cbind(xg,xi)} } x <- xg if(is.null(y)) y <- xg } else { if(length(class(x)) > 1) {#cln <- class(x)[2]} else {cln <- "other"} cln <- inherits(x, obnames, which=TRUE) if (any(cln > 1)) {cln <- obnames[which(cln >0)]} else {cln <- "other"}} else {cln <- "other"} #fixe March 3, 2020 switch(cln, fa = {if(structure) {x <- x$Structure} else {x <- x$loadings}}, omega = {x <- x$schmid$sl x <- as.matrix(x[,1:(ncol(x)-2)])}, omegaSem = {x <- x$omega.efa$cfa.loads}, directSl = {x <- x$direct}, direct = {x <- x$direct}, omegaDirect = {x <- x$loadings}, principal = {x <- x$loadings}, iclust = {x <- x$loadings}, extend = {x <- x$loadings}, esem = {x <- x$loadings}, other = {if(inherits(x, "factanal")) {x <- x$loadings} else {x <- as.matrix(x)}} ) } if(length(class(y)) > 1) { #{ cln <- class(y)[2] } else {cln <- "other"} cln <- inherits(y, obnames, which=TRUE) if (any(cln > 1)) {cln <- obnames[which(cln >0)]} else {cln <- "other"} } else {cln <- "other"} switch(cln, fa = {if(structure) {y <- y$Structure} else {y <- y$loadings}}, omega = {y <- y$schmid$sl y <- as.matrix(y[,1:(ncol(y)-2)])}, omegaSem = {y <- y$omega.efa$cfa.loads}, directSl = {y <- y$direct}, direct = {y <- y$direct}, omegaDirect = {y <- y$loadings}, principal = {y <- y$loadings}, esem = {y <- y$loadings}, extend = {y <- y$loadings}, iclust = {y <- y$loadings}, other = {if(inherits(y, "factanal")) {y <- y$loadings} else {y <- as.matrix(y)}} ) if(any(is.na(x) | any(is.na(y) ))) {warning("Some loadings were missing.") if(!is.null(use)) {message("Analysis is done on complete cases") if(any(is.na(x))) { xc <- x[complete.cases(x),] y <- y[complete.cases(x),] x <- xc } if (any(is.na(y))) { yc <- y[complete.cases(y),] x <- x[complete.cases(y),] y <- yc} } else {warning("Check your data or rerun with the use = complete option")} } nx <- dim(x)[2] ny <- dim(y)[2] cross<- t(y) %*% x #inner product will have dim of ny * nx sumsx<- sqrt(1/diag(t(x)%*%x)) sumsy<- sqrt(1/diag(t(y)%*%y)) result<- matrix(rep(0,nx*ny),ncol=nx) result<- round(sumsy * (cross * rep(sumsx, each = ny)),digits) return(t(result)) } #find the generalized congruence coefficient #normalized cross products #if zero centered data, this is the correlation #if centered on the response midpoint, this is the Cohen cs #handles missing data #find the congruence coefficient #handles missing data congruence <- function(x,y=NULL){ if(is.null(y)) y <- x nvarx <- NCOL(x) nvary <- NCOL(y) if(nvarx == 1) x <- as.matrix(x) if(nvary ==1) y <-as.matrix(y) C <- matrix(nrow=nvary,ncol=nvarx) Cx <- rep(0,nvarx) Cy <- rep(0,nvary) for(i in 1:nvarx) { for(j in 1:nvary) { C[j,i] <- sum(x[,i,drop=FALSE] * y[,j,drop=FALSE],na.rm=TRUE) Cx[i] <- sum(x[,i,drop=FALSE]^2,na.rm=TRUE)}} for(j in 1:nvary){ Cy[j] <- sum(y[,j,drop=FALSE]^2,na.rm=TRUE)} if(nvarx> 1) {dcx <- sqrt(diag(1/Cx))} else {dcx <- sqrt(1/Cx)} if(nvary > 1) {dcy <- sqrt(diag(1/Cy))} else {dcy <- sqrt(1/Cy)} C <- dcy %*% C %*% dcx rownames(C) <- colnames(y) colnames(C) <- colnames(x) class(C) <- c("psych","congruence") return(C) } cohen.profile <- function(x,y=NULL,M=NULL) { if(is.null(y)) y <- x if(is.null(M)) { min.scale<- min(x,y,na.rm=TRUE) max.scale <- max(x,y,na.rm=TRUE) M <- (max.scale + min.scale )/2} congruence(x=x-M,y=y-M)} distance <- function(x,y=NULL,r=2) { if(is.null(y)) y <- x nvarx <- NCOL(x) nvary <- NCOL(y) C <- matrix(nrow=nvary,ncol=nvarx) Cx <- rep(0,nvarx) Cy <- rep(0,nvary) for(i in 1:nvarx) { for(j in 1:nvary) { if(r==1) {C[j,i] <- sum(abs(x[,i,drop=FALSE] - y[,j,drop=FALSE])) } else { C[j,i] <- exp(log(sum((x[,i,drop=FALSE] - y[,j,drop=FALSE])^r,na.rm=TRUE))/r)} } } rownames(C) <- colnames(y) colnames(C) <- colnames(x) C } psych/R/omega.sem.R0000644000176200001440000001022713616115352013604 0ustar liggesusers#January 22, 2008 #omega.graph without the graph #called from omega to do some work without the graphics "omega.sem" <- function(om.results,out.file=NULL,sl=TRUE,labels=NULL,nf=3){ if(is.list(om.results)) { if (sl) {factors <- as.matrix(om.results$schmid$sl) } else {factors <- as.matrix(om.results$schmid$oblique)} #first some basic setup parameters num.var <- dim(factors)[1] #how many variables? if (sl) {num.factors <- dim(factors)[2] -4 } else {num.factors <- dim(factors)[2]} #g, h2,u2,p2 # if(num.factors ==1) {warning("giving lavaan code for a 1 factor omega doesn't really make sense.")} # return(list(sem=NULL,lavaan=NULL))} gloading <- om.results$schmid$gloading } else {factors <- om.results num.var <- nrow(factors) gloading <- factors[,1] num.factors <-nf+1} if(sl) {fact <- c("g",paste("F",1:num.factors,"*",sep="")) } else {fact <- c("g",paste("F",1:num.factors,sep="")) } # e.g. "g" "F'1" "F2" "F3" vars <- paste("V",1:num.var,sep="") if (!is.null(labels)) {vars <- paste(labels)} else{vars <- rownames(factors) } lavaan <- vector("list",nf+1) if (sl) { sem <- matrix(rep(NA,6*(2*num.var + num.factors)),ncol=3) #used for sem diagram } else { sem <- matrix(rep(NA,6*(num.var + num.factors)+3),ncol=3) #used for sem diagram } #show the cluster structure with ellipses if (sl) { l <- matrix(factors[,2:(num.factors+1)],ncol=num.factors) } else { l <- factors } m1 <- matrix(apply(t(apply(l, 1, abs)), 1, which.max), ncol = 1) lavaan[[1]] <- 'g =~ ' if (sl) { k <- num.var for (i in 1:num.var) { sem[i,1] <- paste(fact[1],"->",vars[i],sep="") sem[i,2] <- vars[i] lavaan[[1]] <- paste0(lavaan[[1]], '+', vars[i]) } } else { k <- num.factors for (j in 1:num.factors) { sem[j,1] <- paste(fact[1],"->",fact[1+j],sep="") sem[j,2] <- paste("g",fact[1+j],sep="") lavaan[[1]] <- paste0(lavaan[[1]], ' + ', fact[1+j]) } } if(num.factors > 1) for(j in 1:num.factors) { lavaan[[1+j]] <- paste0('F',j ,"=~ ")} for (i in 1:num.var) { sem[i+k,1] <- paste(fact[1+m1[i]],"->",vars[i],sep="") sem[i+k,2] <- paste(fact[1+m1[i]],vars[i],sep="") if(num.factors > 1) lavaan[[1+ m1[i ]]] <- paste0(lavaan[[1 + m1[i] ]] ,' + ', vars[i]) } if(sl) { k <- num.var*2 for (i in 1:num.var) { sem[i+k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[i+k,2] <- paste("e",i,sep="") } k <- k + num.var for (f in 1:num.factors) { sem[f+k,1] <- paste(fact[1+f],"<->",fact[1+f],sep="") sem[f+k,3] <- "1" } k <- k+ num.factors sem[k+1,1] <- paste("g <->g") sem[k+1,3] <- "1" k<- k+1 } else { k <- num.var + num.factors for (i in 1:num.var) { sem[i+k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[i+k,2] <- paste("e",i,sep="") } k <- 2*num.var + num.factors for (f in 1:num.factors) { sem[f+k,1] <- paste(fact[f+1],"<->",fact[f+1],sep="") sem[f+k,3] <- "1" } k <- 2*num.var + 2*num.factors sem[k+1,1] <- paste("g<->g") sem[k+1,3] <- "1" k <- k+1 } colnames(sem) <- c("Path","Parameter","Initial Value") sem[,1] <- gsub("-","",sem[,1]) #get rid of the negative signs for variables sem[,1] <- gsub(">","->",sem[,1]) #but put back in the arrows lavaan <- gsub("-","",unlist(lavaan)) lavaan <- noquote(lavaan) return(list(sem=sem[1:k,],lavaan=lavaan)) #return(list(sem=sem[1:k,],) } psych/R/interp.median.R0000644000176200001440000000611110756045473014473 0ustar liggesusers"interp.median" <- function(x,w=1,na.rm=TRUE) { im <- interp.quantiles(x,q=.5,w,na.rm=na.rm) return(im)} "interp.quantiles" <- function(x,q=.5,w=1,na.rm=TRUE) { if (!(q>0) | !(q<1) ) {stop("quantiles most be greater than 0 and less than 1 q = ",q)} if(is.vector(x)) {im <- interp.q(x,q,w,na.rm=na.rm) } else { if((is.matrix(x) | is.data.frame(x)) ){ n <- dim(x)[2] im <- matrix(NA,ncol=n) for (i in 1:n) {im[i] <- interp.q(x[,i],q,w=w,na.rm=na.rm)} colnames(im) <- colnames(x) } else {stop('The data must be either a vector, a matrix, or a data.frame')} return(im) }} "interp.q" <- function(x,q=.5,w=1,na.rm=TRUE) { if(na.rm) { x <- x[!is.na(x)]} n <- length(x) n2 <- (n+1)*q o <- order(x) x <- x[o] ml <- x[floor(n2)] mh <- x[ceiling(n2)] m <- (mh+ml)/2 xb <- sum(xm) am <- n - xa - xb if(am >1) { im <- m -.5 *w + w*(n*q - xb )/am #even number at median } else {im <- m } #no ties return(im) } "interp.quart" <- function(x,w=1,na.rm=TRUE) { q <- c(.25,.5,.75) if(na.rm) { x <- x[!is.na(x)]} n <- length(x) n2 <- (n+1)*q N<- n*q o <- order(x) x <- x[o] ml <- x[floor(n2)] mh <- x[ceiling(n2)] m <- (mh+ml)/2 im<- xa <- xb <- rep(NA,3) for (i in 1:3) {xb[i] <- sum(x m[i]) } am <- n - xa - xb for (i in 1:3) {if(am[i] >1) { im[i] <- m[i] - .5*w + w*(N[i]-xb[i])/am[i]} else im[i] <- m[i]} return(im)} "interp.quartiles" <- function(x,w=1,na.rm=TRUE) { q <- c(.25,.5,.75) if(is.vector(x)) {im <- interp.quart(x,w,na.rm=na.rm) names(im) <- c("Q1","Median","Q3") } else { nvar <- dim(x)[2] im <- matrix(NA,ncol=3,nrow=nvar) for (i in 1:nvar ) { im[i,] <- interp.quart(x[,i],w,na.rm=na.rm)} rownames(im) <- colnames(x) colnames(im) <- c("Q1","Median","Q3")} return(im)} "interp.values" <- function(x,w=1,na.rm=TRUE) { n <- length(x) tabx <- table(x) cv <- as.numeric(names(tabx)) k <- 1 v <- x[order(x)] for (i in 1:length(tabx)) { for (j in 1:tabx[i]) { v[k] <- i - .5* w + j/(tabx[i]+1) k <- k+1 } } return(v) } "interp.boxplot" <- function(x,w=1,na.rm=TRUE) { stats <- interp.quartiles(x,w,na.rm=na.rm) return(stats) } "interp.qplot.by" <- function(y,x,w=1,na.rm=TRUE,xlab="group",ylab="dependent",ylim=NULL,arrow.len=.05,typ="b",add=FALSE,...) { z <- by(y,x,interp.quartiles) zname <- names(z) z <- matrix(unlist(z),ncol=3,byrow=TRUE) rownames(z) <- zname colnames(z) <- c("Q1","Median","Q3") xv <- as.numeric(zname) ymin <- min(y) ymax <- max(y) if(is.null(ylim)) {ylim <- c(ymin,ymax)} if(add) { points(xv,z[,2],typ=typ,...)} else { plot(xv,z[,2],ylim = ylim,xlab=xlab,ylab=ylab,typ=typ,...)} lenx <- length(xv) for (i in 1:lenx) { xlow <- z[i,1] xcen <- z[i,2] xup <- z[i,3] arrows(xv[i],xlow,xv[i],xup,length=arrow.len, angle = 90, code=3, lty = NULL, lwd = par("lwd"), xpd = NULL,...) } } psych/R/error.bars.by.R0000644000176200001440000005014714510014416014417 0ustar liggesusers"error.bars.by" <- function (x,group,data=NULL,by.var=FALSE,x.cat=TRUE,ylab =NULL,xlab=NULL,main=NULL, ylim= NULL, xlim=NULL, eyes=TRUE,alpha=.05,sd=FALSE,labels=NULL, v.labels=NULL,v2.labels=NULL,add.labels=NULL, pos=NULL,arrow.len=.05,min.size=1, add=FALSE,bars=FALSE, within=FALSE, colors=c("black","blue","red"),lty,lines=TRUE, legend=0, pch=16, density=-10,stats=NULL,...) # x data frame with { if(!lines) {typ <- "p"} else {typ <- "b"} n.color <- length(colors) n.pch =length(pch) #first, see if they are in formula mode added August 18, 2018 formula <- FALSE if(inherits(x, "formula")) { ps <- fparse(x) if(missing(data) | (length(data)==0)) {x <- get(ps$y) group <- ps$x } else {x <- data[ps$y] group <- data[ps$x]} formula <- TRUE # if(is.null(data)) stop("You must specify the data if you are using formula input") # x <- data[ps$y] # group <- data[ps$x] } grp2 <- n.grp2 <- 1 #just to set an initial value if(is.null(ylab)) ylab <- colnames(x) if(is.null(xlab)) xlab <- colnames(group) if(missing(by.var)) by.var=TRUE if(missing(lines)) lines <- FALSE if(NROW(group) <2 ) group <- x[,group] if(NCOL(group)==1) {n.grp1 <- length(table(group)) n.grp2 <- 1} else {n.grp1 <- (dim(table(group))[1]) n.grp2 <- dim(table(group))[2] } nvar <- NCOL(x) # if(is.null(nvar)) nvar <- 1 #added May 21, 2016 to handle the case of a single variable if(by.var & (nvar > n.color)) {colors <- rainbow(nvar)} if(!missing(density)) {col12 <- col2rgb(colors,TRUE)/255 colors <- rgb(col12[1,],col12[2,],col12[3,],.5) n.color <- length(colors) if(length(density==1)) density=rep(density,n.color)} #density = -10 if(missing(lty)) lty <- 1:8 legend.location <- c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center","none") if(is.null(stats)) { all.stats <- describe(x) } else {all.stats <- stats z <- dim(all.stats)[1] names <- rownames(stats) if(is.null(all.stats$se) ) stats$se <- all.stats$se <- stats$sd/sqrt(stats$n-1) if(is.null(all.stats$max)) all.stats$max <- max(stats$mean,na.rm=TRUE) if(is.null(all.stats$min)) all.stats$min <- min(stats$mean,na.rm=TRUE) } min.x <- min(all.stats$min,na.rm=TRUE) max.x <- max(all.stats$max,na.rm=TRUE) max.se <- max(all.stats$se,na.rm=TRUE) if(sd) max.se <- max(all.stats$sd,na.rm=TRUE) if(is.null(ylim)) {if(is.na(max.x) | is.na(max.se) | is.na(min.x) | is.infinite(max.x)| is.infinite(min.x) | is.infinite(max.se)) { ylim=c(0,1)} else { if(sd) { ylim <- c(min.x - max.se,max.x+max.se) } else { ylim=c(min.x - 2*max.se,max.x+2*max.se)}} } if(is.null(main)) {if(sd) {main <- paste("Means + Standard Deviations") } else {main <- paste((1-alpha)*100,"% confidence limits",sep="")} } ########## # bar plot (ugly but some people like them) ########## if (bars) { #draw a bar plot and add error bars -- this is ugly but some people like it if(is.null(stats)){ group.stats <- describeBy(x,group,mat=TRUE,skew=FALSE) n.var <- dim(all.stats)[1] n.group <- length(group.stats[[1]])/n.var #this is total number of groups but it may be 2 x 2 or n x m } else {n.group <- NROW(stats) #we have the stats as input, we just use them here group.stats <- list() group.stats$mean <- stats$mean group.stats$n <- stats$n group.stats$sd <- stats$sd group.stats$se <- stats$se } # group.stats <- describeBy(x,group,mat=TRUE,skew=FALSE) # n.var <- dim(all.stats)[1] # n.group <- length(group.stats[[1]])/n.var if(is.null(stats)){group.means <- matrix(group.stats$mean,ncol=n.group,byrow=TRUE)} else {group.means <- as.matrix(group.stats$mean,drop=FALSE)} if(missing(pch)) pch <- seq(15,(15+n.grp2)) if(sd) {group.se <- matrix(group.stats$sd,ncol=n.group,byrow=TRUE)} else { group.se <- matrix(group.stats$se,ncol=n.group,byrow=TRUE)} group.n <- matrix(group.stats$n,ncol=n.group,byrow=TRUE) if(within) {group.smc <- matrix(unlist(by(x,group,smc)),nrow=n.group,byrow=TRUE) group.sd <- matrix(group.stats$sd,ncol=n.group,byrow=TRUE) if(sd) {group.se <- sqrt(group.se^2 * (1-group.smc))} else { group.se <- sqrt(group.sd^2 *(1-group.smc)/group.n) }} rownames(group.means) <- rownames(all.stats) if(is.null(labels)) {colnames(group.means) <- paste("Group",1:n.group)} else {colnames(group.means) <- labels } if (is.null(ylab)) ylab <- "Dependent Variable" if(missing(ylim)) ylim=c(0,max.x+2*max.se) if(by.var) { if (is.null(xlab)) xlab <- "Variables" mp <- barplot(t(group.means),beside=TRUE,ylab=ylab,xlab=xlab,ylim=ylim,main=main,col=colors,...) for(i in 1:n.var) { for (j in 1:n.group) { xcen <- group.means[i,j] xse <- group.se[i,j] if(sd) {ci <- 1} else {ci <- qt(1-alpha/2,group.n[i,j])} if(is.finite(xse) && xse>0) arrows(mp[j,i],xcen-ci*xse,mp[j,i],xcen+ci* xse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) }} } else { if (is.null(xlab)) xlab <- "Grouping Variable" mp <- barplot(group.means,beside=TRUE,ylab=ylab,xlab=xlab,ylim=ylim,main=main,col=colors,...) for(i in 1:n.var) { for (j in 1:n.group) { xcen <- group.means[i,j] xse <- group.se[i,j] xn <- group.n[i,j] if(sd) {ci <- 1} else {ci <- qt(1-alpha/2,group.n[i,j])} if(is.finite(xse) && xse>0 && xn > 0 && is.finite(ci)) arrows(mp[i,j],xcen-ci*xse,mp[i,j],xcen+ci* xse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) }} } axis(2,...) box() if(legend >0 ){ if(!is.null(v.labels)) {lab <- v.labels} else {lab <- paste("V",1:n.var,sep="")} legend(legend.location[legend], legend=lab, col = colors[(1: n.color)],pch=pch[1: n.var], text.col = "green4", lty = lty[1:n.var], merge = TRUE, bg = 'gray90')} } else { ############### #the normal case is to not use bars # could use some of the code from the bars section to be tighter code ############### if(is.null(stats)){ group.stats <- describeBy(x,group) n.group <- length(group.stats) #this is total number of groups but it may be 2 x 2 or n x m } else {n.group <- NROW(stats) #we have the stats as input, we just use them here group.stats <- list() for(g in (1:n.group)) {group.stats[[g]] <- stats[g,]} } #why not use matrix output for this n.var <- ncol(x) if(is.null(n.var)) n.var <- 1 #first set up some defaults to allow the specification of colors, lty, and pch dynamically and with defaults if(missing(pch)) pch <- seq(15,(15+n.group)) if(missing(lty)) lty <- 1:8 if(within) {group.smc <- by(x,group,smc) } z <- dim(x)[2] if(is.null(z)) z <- 1 if (is.null(ylab)) ylab <- "Dependent Variable" if(!by.var) { if(is.null(v.labels)) v.labels <- names(group.stats) #use the values of the grouping variable if (is.null(xlab)) xlab <- "Independent Variable" for (g in 1:n.group) { x.stats <- group.stats[[g]] if (within) { x.smc <- group.smc[[g]] if(sd) {x.stats.$se <- sqrt(x.stats$sd^2* (1- x.smc))} else { x.stats$se <- sqrt((x.stats$sd^2* (1- x.smc))/x.stats$n)} } if (missing(xlim)) xlim <- c(.5,n.var+.5) if(!add) {plot(x.stats$mean,ylim=ylim,xlim=xlim, xlab=xlab,ylab=ylab,main=main,typ=typ,lty=(lty[((g-1) %% 8 +1)]),axes=FALSE,col = colors[(g-1) %% n.color +1], pch=pch[g],...) #axis(1,1:z,colnames(x),...) axis(1,1:z,v.labels,...) axis(2,...) box() } else {points(x.stats$mean,typ = typ,lty=lty[((g-1) %% 8 +1)],col = colors[(g-1) %% n.color +1], pch=pch[g]) } if(!is.null(labels)) {lab <- labels} else {lab <- paste("V",1:z,sep="")} if (length(pos)==0) {locate <- rep(1,z)} else {locate <- pos} if (length(labels)==0) lab <- rep("",z) else lab <-labels for (i in 1:z) {xcen <- x.stats$mean[i] if(sd) {xse <- x.stats$sd[i] } else {xse <- x.stats$se[i]} if(sd) {ci <- 1} else { if(x.stats$n[i] >1) {ci <- qt(1-alpha/2,x.stats$n[i]-1)} else {ci <- 0}} #corrected Sept 11, 2013 if(is.finite(xse) & (xse > 0)) { arrows(i,xcen-ci*xse,i,xcen+ci* xse,length=arrow.len, angle = 90, code=3,col = colors[(g-1) %% n.color +1], lty=(lty[((g-1) %% 8 +1)]), lwd = par("lwd"), xpd = NULL) if (eyes) {catseyes(i,xcen,xse,x.stats$n[i],alpha=alpha,density=density[(g-1) %% n.color +1],col=colors[(g-1) %% n.color +1] )} #text(xcen,i,labels=lab[i],pos=pos[i],cex=1,offset=arrow.len+1) #puts in labels for all points } } add <- TRUE # lty <- "dashed" } #end of g loop if(legend >0 ){ if(!is.null(labels)) {lab <- labels} else {lab <- paste("G",1:n.group,sep="")} legend(legend.location[legend], legend=lab, col = colors[(1: n.color)],pch=pch[1: n.group], text.col = "green4", lty = lty[1:8], merge = TRUE, bg = 'gray90') } # #now, if v2.labels, get the var.means, etc # n.vars <- dim(x)[2] # if(is.null(n.vars)) n.vars <- 1 #if we just have one variable to plot # var.means <- matrix(NA,nrow=n.vars,ncol=n.group) # var.n <- var.se <- ci <- matrix(0,nrow=n.vars,ncol=n.group) # for (g in 1:n.group) { # var.means[,g] <- as.numeric (group.stats[[g]]$mean) #problem with dimensionality -- if some grouping variables are empty # if(sd) {var.se[,g] <- as.numeric(group.stats[[g]]$sd)} else {var.se[,g] <- as.numeric(group.stats[[g]]$se) } # var.n [,g] <-as.numeric( group.stats[[g]]$n) # } } else { # end of not by var loop #alternatively, do it by variables rather than by groups, or if we have two grouping variables, treat them as two variables if (is.null(xlab)) xlab <- "Grouping Variable" n.vars <- dim(x)[2] if(is.null(n.vars)) n.vars <- 1 #if we just have one variable to plot var.means <- matrix(NA,nrow=n.vars,ncol=n.group) var.n <- var.se <- ci <- matrix(0,nrow=n.vars,ncol=n.group) #if there are two or more grouping variables,this strings them out and treats them as two separate variables. for (g in 1:n.group) { var.means[,g] <- as.numeric (group.stats[[g]]$mean) #problem with dimensionality -- if some grouping variables are empty if(sd) {var.se[,g] <- as.numeric(group.stats[[g]]$sd)} else {var.se[,g] <- as.numeric(group.stats[[g]]$se) } var.n [,g] <-as.numeric( group.stats[[g]]$n) } var.n[is.na(var.n)] <- 0 var.n[var.n < min.size] <- 0 if(x.cat) {x.values <- 1:n.grp1} else { x.values <- names(table(group[,1])) x.values <- as.numeric(x.values) } if(missing(xlim)) xlim <- c(.5,max(x.values) + .5) if(is.null(v.labels)) v.labels <- names(unlist(dimnames(group.stats)[1])) for (i in 1:n.vars) { if(!add) { #although index by i, this just plots the first variable using plot, the other one is plotted using points plot(x.values,var.means[i,1:n.grp1],ylim=ylim,xlim = xlim, xlab=xlab,ylab=ylab,main=main,typ = typ,axes=FALSE,lty=lty[i],pch=pch[i],col = colors[(i-1) %% n.color +1],...) #we need to index by what grp1 we are in, not by i grp2 <- 1 #just in case we only have one IV if(n.grp1 < n.group) { #do we have multiple IVs # x.values1 <- rep(x.values,(n.group/n.grp1 -1)) for(grp2 in 1:(n.grp2-1)){ points(x.values,var.means[i,(n.grp1 *grp2 +1):(n.grp1*(grp2+1))],typ=typ,lty=lty[(grp2) %% 8 +1],col = colors[grp2 %% n.color + 1],pch=pch[(grp2 %% n.pch + 1)],...) } # points(x.values1,var.means[i,(n.grp1 +1):n.group],typ = typ,lty=lty[((i-1) %% 8 +1)],col = colors[(i) %% n.color + 1], pch=pch[i],...) #the first grouping variable } add <- TRUE } else { #now, draw the points with different line types and colors for each value of the second grouping variable #we want to adjust the line type for the fact that we have two different grouping variables points(x.values,var.means[i,1:(n.grp1)],typ = typ,lty=lty[((i-1) %% 8 +1)],col = colors[(i-1) %% n.color + 1], pch=pch[i],...) if(n.grp2>1) { for(grp2 in 1:(n.grp2-1)){ points(x.values,var.means[i,(n.grp1 *grp2 +1):(n.grp1*(grp2+1))],typ=typ,lty=lty[(grp2-1) %% 8 +1],col = colors[(grp2-1) %% n.color + 1], pch=pch[((grp2-1) %% n.pch + 1)],...) # if(n.grp1 < n.group) { points(x.values1,var.means[i,(n.grp1 +1):(n.group)],typ = typ,lty=lty[((1:(n.grp1)-1) %% 8 +1)],col = colors[(1:(n.grp1)) %% n.color + 1], pch=pch[i],...) } } # points(x.values,var.means[i,],typ = typ,lty=lty,...) } } if(!is.null(labels)) {lab <- labels} else {lab <- paste("G",1:z,sep="")} if (length(pos)==0) {locate <- rep(1,z)} else {locate <- pos} if (length(labels)==0) lab <- rep("",z) else lab <-labels #for (g in 1:n.group) { xcen <- var.means[i,] xse <- var.se[i,] if(sd) {ci <- rep(1,n.group)} else { ci[(var.n > 1)] <- qt(1 - alpha/2, var.n[(var.n > 1)] - 1) } # } for (g in 1:n.grp1) { x.stats <- group.stats[[g]] if(x.cat) { #the normal case # arrows(g,xcen[g]-ci[g]*xse[g],g,xcen[g]+ci[g]* xse[g],length=arrow.len, angle = 90, code=3, col = colors[(i-1) %% n.color +1], lty = NULL, lwd = par("lwd"), xpd = NULL) if(!is.na(var.n[g]) & (var.n [g] >1)) {if (eyes) { catseyes(g,xcen[g],xse[g],group.stats[[g]]$n[i],alpha=alpha,density=density[(i-1) %% n.color +1],col=colors[(i-1) %% n.color +1] ) #why is this here? } else { if(ci[g]> 0) arrows(x.values[g],xcen[g]-ci[g]*xse[g],x.values[g],xcen[g]+ci[g]* xse[g],length=arrow.len, angle = 90, code=3,col = colors[(i-1) %% n.color +1], lty=(lty[((grp2-1) %% 8 +1)]), lwd = par("lwd"), xpd = NULL) if (eyes) {catseyes(x.values[g],xcen[g],xse[g],x.stats$n[g],alpha=alpha,density=density[(i-1) %% n.color +1],col=colors[(i-1) %% n.color +1] )}} } } else {#consider the !x.cat condition } #text(xcen,i,labels=lab[i],pos=pos[i],cex=1,offset=arrow.len+1) #puts in labels for all points } x.values <- as.numeric(x.values) if(n.grp1 < n.group) { for (g in 1: n.grp1) { for(grp2 in 1:(n.grp2-1)){if(!is.na(xse[g + n.grp1]) && (xse[g + n.grp1] > 0) && (ci[g + n.grp1]>0 )) { if(x.cat) { arrows(g,xcen[g+n.grp1*grp2]-ci[g+n.grp1*grp2]*xse[g+n.grp1*grp2],g,xcen[g+n.grp1*grp2]+ci[g+n.grp1*grp2]* xse[g+n.grp1*grp2],length=arrow.len, angle = 90, code=3, col = colors[(grp2) %% n.color +1], lty=(lty[((grp2-1) %% 8 +1)]), lwd = par("lwd"), xpd = NULL) } else {#consider the none cat condition x.values <- as.numeric(x.values) arrows(x.values[g],xcen[g+n.grp1*grp2]-ci[g+n.grp1*grp2]*xse[g+n.grp1*grp2],x.values[g],xcen[g+n.grp1*grp2]+ci[g+n.grp1*grp2]* xse[g+n.grp1*grp2],length=arrow.len, angle = 90, code=3, col = colors[(grp2) %% n.color +1], lty=(lty[((grp2-1) %% 8 +1)]), lwd = par("lwd"), xpd = NULL) } if (eyes) { catseyes(x.values[g],xcen[g+n.grp1*grp2],xse[g+n.grp1 * grp2],group.stats[[g+n.grp1*grp2]]$n[i],alpha=alpha,density=density[(i) %% n.color +1],col=colors[(i) %% n.color +1] ) } else { # if(xse[g + n.grp1]> 0) if(ci[g]>0) {arrows(x.values[g],xcen[g+n.grp1]-ci[g+n.grp1]*xse[g+n.grp1],x.values[g+n.grp1],xcen[g + n.grp1]+ci[g+n.grp1]* xse[g+n.grp1],length=arrow.len, angle = 90, code=3,col = colors[(grp2-1) %% n.color +1], lty=(lty[((grp2-1) %% 8 +1)]), lwd = par("lwd"), xpd = NULL) if (eyes) {catseyes(x.values[g],xcen[g+n.grp1],xse[g+n.grp1],x.stats$n[g+n.grp1],alpha=alpha,density=density[(i-1) %% n.color +1],col=colors[(i-1) %% n.color +1] )}} } } } #for grp2 loop } } if(x.cat) {axis(1,1:n.grp1,v.labels,...) } else {axis(1)} axis(2,...) box() } #end of i loop } #end of by var is true loop #### now add labels and lengeds if desired #now, if v2.labels, get the var.means, etc n.vars <- dim(x)[2] if(is.null(n.vars)) n.vars <- 1 #if we just have one variable to plot var.means <- matrix(NA,nrow=n.vars,ncol=n.group) var.n <- var.se <- ci <- matrix(0,nrow=n.vars,ncol=n.group) for (g in 1:n.group) { var.means[,g] <- as.numeric (group.stats[[g]]$mean) #problem with dimensionality -- if some grouping variables are empty if(sd) {var.se[,g] <- as.numeric(group.stats[[g]]$sd)} else {var.se[,g] <- as.numeric(group.stats[[g]]$se) } var.n [,g] <-as.numeric( group.stats[[g]]$n) } if(!by.var) var.means <- t(var.means) if(!is.null(add.labels)) { if(is.null(v2.labels)) v2.labels <- names(unlist(dimnames(group.stats)[2])) if(is.null(pos)) { if(x.cat) {text.values <- data.frame(x=rep(.5 + (add.labels=="right")*NCOL(var.means),NROW(var.means)),y=var.means[,(add.labels=="right")*(NCOL(var.means)-1)+1],name=v2.labels)} else { text.values <- data.frame(x=rep((.5+ (add.labels == "right") * x.values[n.grp1]),n.grp2),y=var.means[seq(1+(add.labels == "right")* (n.grp1),n.grp2 * n.grp1,(n.grp1))],name=v2.labels)} #if(x.cat){text.values <- data.frame(x=rep((.5+ (add.labels == "right")* n.grp1),n.grp2),y=var.means[seq(1+(add.labels == "right")* (n.grp1-1),n.grp2 * n.grp1,)],name=v2.labels)} else { # text.values <- data.frame(x=rep((.5+ (add.labels == "right") * x.values[n.grp1]),n.grp2),y=var.means[seq(1+(add.labels == "right")* (n.grp1),n.grp2 * n.grp1,(n.grp1))],name=v2.labels)} text(text.values$x,text.values$y,text.values$name) } else { text.values <- data.frame(x=rep(.5 + (add.labels=="right")*NCOL(var.means)),y=var.means[,NCOL(var.means)],name=v2.labels ,position=pos) # text.values <- data.frame(x=rep((.5+ (add.labels == "right")* x.values[n.grp1]),n.grp2),y=var.means[,1+(add.labels == "right")* (n.grp1-1) ],name=v2.labels,position=pos) text(text.values$x,text.values$y,text.values$name,pos=text.values$position) } } if(legend > 0 ){ if(!is.null(labels)) {lab <- labels} else {lab <- paste("V",1:n.grp2,sep="")} if ((nvar > 1) & (n.grp2 ==1)) { legend(legend.location[legend], legend=lab, col = colors[(0: nvar)%% n.color + 1],pch=pch[1: n.grp2], text.col = "green4", lty = lty[1:nvar], merge = TRUE, bg = 'gray90')} else { legend(legend.location[legend],legend= lab, col = colors[(0: n.grp2) %% n.color + 1],pch=pch[1: n.grp2], text.col = "green4", lty = lty[1:n.grp2], merge = TRUE, bg = 'gray90') } } } invisible(group.stats) } #corrected Feb 2, 2011 to plot alpha/2 rather than alpha #modifed Feb 2, 2011 to not plot lines if they are not desired. #modified May 21, 2016 to handle a case of a single vector having no columns #modified April 9, 2019 to include v.labels for plots #modified July 11, 2020 to allow formula input #modified Novemember, 2020 to handle NULL sample sizes #further modified November 2020 to properly label the lines and fix the pch to match the legend #And yet some more, December 31st to better handle non-=categorial variables #8/27/23 added the ability to plot v.labels for both by.var=TRUE and by.var=FALSEpsych/R/print.psych.vss.R0000644000176200001440000000677714444406766015057 0ustar liggesusers"print_psych.vss" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { if(!is.null(x$cfit.1)) { if(x$title!="Very Simple Structure") { cat("\nVery Simple Structure of ", x$title,"\n") } else {cat("\nVery Simple Structure\n")} cat("Call: ") print(x$call) vss.max <- round(max(x$cfit.1) ,digits) wh.max <- which.max(x$cfit.1) if(wh.max !=first.max(x$vss.stats$cfit.1)) {cat("Although the VSS complexity 1 shows ",wh.max," factors, it is probably more reasonable to think about ",first.max(x$vss.stats$cfit.1), " factors\n")} else { cat("VSS complexity 1 achieves a maximimum of ") cat(vss.max," with " ,which.max(x$cfit.1), " factors\n") } cat("VSS complexity 2 achieves a maximimum of ") vss.max <- round(max(x$cfit.2) ,digits) cat(vss.max," with " ,which.max(x$cfit.2), " factors\n") cat("\nThe Velicer MAP achieves a minimum of ") vss.map <- round(min(x$map,na.rm=TRUE) ,digits) cat(vss.map," with " ,which.min(x$map), " factors ") bic.min <- round(min(x$vss.stats[,"BIC"],na.rm=TRUE),digits) cat("\nBIC achieves a minimum of ", bic.min," with ", which.min(x$vss.stats[,"BIC"])," factors\n") sabic.min <- round(min(x$vss.stats[,"SABIC"],na.rm=TRUE),digits) cat("Sample Size adjusted BIC achieves a minimum of ", sabic.min," with ", which.min(x$vss.stats[,"SABIC"])," factors\n") # cat("\nVelicer MAP\n") # print(round(x$map,digits)) # cat("\nVery Simple Structure Complexity 1\n") # print(round(x$cfit.1,digits)) # cat("\nVery Simple Structure Complexity 2\n") # print(round(x$cfit.2,digits)) temp <- data.frame(vss1=x$cfit.1,vss2=x$cfit.2,map=x$map,x$vss.stats[,1:13]) cat("\nStatistics by number of factors \n") print(temp,digits=digits) } else { if(x$title!="Number of Factors") { cat("\nNumber of factors of ", x$title,"\n") } else {cat("\nNumber of factors\n")} cat("Call: ") print(x$call) cat("VSS complexity 1 achieves a maximimum of ") vss.max <- round(max(x$vss.stats$cfit.1,na.rm=TRUE) ,digits) wh.max <- which.max(x$vss.stats$cfit.1) if(wh.max !=first.max(x$vss.stats$cfit.1)) {cat("Although the vss.max shows ",wh.max," factors, it is probably more reasonable to think about ",first.max(x$vss.stats$cfit.1), " factors\n")} else { cat(vss.max," with " ,wh.max, " factors\n") } cat("VSS complexity 2 achieves a maximimum of ") vss.max <- round(max(x$vss.stats$cfit.2,na.rm=TRUE) ,digits) cat(vss.max," with " ,which.max(x$vss.stats$cfit.2), " factors\n") cat("The Velicer MAP achieves a minimum of ") vss.map <- round(min(x$map,na.rm=TRUE) ,digits) cat(vss.map," with " ,which.min(x$map), " factors ") bic.min <- round(min(x$vss.stats[["eBIC"]],na.rm=TRUE),digits) cat("\nEmpirical BIC achieves a minimum of ", bic.min," with ", which.min(x$vss.stats[["eBIC"]])," factors\n") sabic.min <- round(min(x$vss.stats[["SABIC"]],na.rm=TRUE),digits) cat("Sample Size adjusted BIC achieves a minimum of ", sabic.min," with ", which.min(x$vss.stats[["SABIC"]])," factors\n") # cat("\nVelicer MAP\n") # print(round(x$map,digits)) # cat("\nVery Simple Structure Complexity 1\n") # print(round(x$cfit.1,digits)) # cat("\nVery Simple Structure Complexity 2\n") # print(round(x$cfit.2,digits)) temp <- data.frame(vss1=x$vss.stats$cfit.1,vss2=x$vss.stats$cfit.2,map=x$map,x$vss.stats[,1:13]) cat("\nStatistics by number of factors \n") print(temp,digits=digits)} } #end print.psych.vss "first.max" <- function(x) { nx <- length(x) -1 first <- which.max(x) for(i in 1:nx) {if(x[i] > x[i+1]){first <- i break } } return(first) }psych/R/fa.sapa.R0000644000176200001440000001650613327623124013250 0ustar liggesusers"fa.sapa" <- function(r,nfactors=1,n.obs = NA,n.iter=1,rotate="oblimin",scores="regression", residuals=FALSE,SMC=TRUE,covar=FALSE,missing=FALSE,impute="median", min.err = .001,max.iter=50,symmetric=TRUE,warnings=TRUE,fm="minres",alpha=.1, p =.05,oblique.scores=FALSE,np.obs=NULL,use="pairwise",cor="cor",correct=.5,weight=NULL,frac=.1,...) { cl <- match.call() if(isCorrelation(r)) {if(is.na(n.obs) && (n.iter >1)) stop("You must specify the number of subjects if giving a correlation matrix and doing confidence intervals") # if(!require(MASS)) stop("You must have MASS installed to simulate data from a correlation matrix") } f <- fac(r=r,nfactors=nfactors,n.obs=n.obs,rotate=rotate,scores=scores,residuals=residuals,SMC = SMC,covar=covar,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor, correct=.5,weight=weight,...=...) #call fa with the appropriate parameters fl <- f$loadings #this is the original #f1 <- fa.sort(f1) #put them into echelon form But this does not work with target.rot # if(!require(parallel)) {message("Parallels is required to do confidence intervals")} nvar <- dim(fl)[1] if(n.iter > 1) { if(is.na(n.obs) ) {n.obs <- f$n.obs} replicates <- list() rep.rots <- list() #using cor="tet" seems to lead to an error being thrown in factoring, which in turn hangs mclapply if(cor!="tet") {replicateslist <- parallel::mclapply(1:n.iter,function(x) { #replicateslist <- lapply(1:n.iter,function(x) { if(isCorrelation(r)) {#create data sampled from multivariate normal with observed correlation mu <- rep(0, nvar) #X <- mvrnorm(n = n.obs, mu, Sigma = r, tol = 1e-06, empirical = FALSE) #the next 3 lines replaces mvrnorm (taken from mvrnorm, but without the checks) eX <- eigen(r) X <- matrix(rnorm(nvar * n.obs),n.obs) X <- t(eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(X)) } else { X <- r[sample(n.obs,n.obs*frac,replace=FALSE),]} fs <- fac(X,nfactors=nfactors,rotate=rotate,scores="none",SMC = SMC,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor,correct=correct,...=...) #call fa with the appropriate parameters if(nfactors == 1) { npairs <- pairwiseCount(X,diagonal=FALSE) mean.npairs <- mean(npairs,na.rm=TRUE) replicates <- list(loadings=fs$loadings,npairs=mean.npairs) } else { t.rot <- target.rot(fs$loadings,fl) npairs <- pairwiseCount(X,diagonal=FALSE) mean.npairs <- mean(npairs,na.rm=TRUE) if(!is.null(fs$Phi)) { phis <- fs$Phi # should we rotate the simulated factor correlations? #we should report the target rotated phis, not the untarget rotated phis replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(t.rot$Phi)],npairs=mean.npairs) #corrected 6/10/15 #replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(phis)]) } else {replicates <- list(loadings=t.rot$loadings)} }}) } else {replicateslist <- lapply(1:n.iter,function(x) { #avoiding parallel for this case if(isCorrelation(r)) {#create data sampled from multivariate normal with observed correlation mu <- rep(0, nvar) #X <- mvrnorm(n = n.obs, mu, Sigma = r, tol = 1e-06, empirical = FALSE) #the next 3 lines replaces mvrnorm (taken from mvrnorm, but without the checks) eX <- eigen(r) X <- matrix(rnorm(nvar * n.obs),n.obs) X <- t(eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*% t(X)) } else {X <- r[sample(n.obs,n.obs*frac,replace=FALSE),]} fs <- fac(X,nfactors=nfactors,rotate=rotate,scores="none",SMC = SMC,missing=missing,impute=impute,min.err=min.err,max.iter=max.iter,symmetric=symmetric,warnings=warnings,fm=fm,alpha=alpha,oblique.scores=oblique.scores,np.obs=np.obs,use=use,cor=cor,correct=correct,...=...) #call fa with the appropriate parameters if(nfactors == 1) {replicates <- list(loadings=fs$loadings)} else { t.rot <- target.rot(fs$loadings,fl) npairs <- pairwiseCount(X,diagonal=FALSE) mean.npairs <- mean(npairs,na.rm=TRUE) if(!is.null(fs$Phi)) { phis <- fs$Phi # should we rotate the simulated factor correlations? #we should report the target rotated phis, not the untarget rotated phis replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(t.rot$Phi)],npairs=mean.npairs) #corrected 6/10/15 #replicates <- list(loadings=t.rot$loadings,phis=phis[lower.tri(phis)]) } else {replicates <- list(loadings=t.rot$loadings)} }})} replicates <- matrix(unlist(replicateslist),nrow=n.iter,byrow=TRUE) mean.npairs <- mean(replicates[,NCOL(replicates)],na.rm=TRUE) sds.pairs <- sd(replicates[,NCOL(replicates)],na.rm=TRUE) replicates <- replicates[,-NCOL(replicates),drop=FALSE] #drop weird replications (loadings > 1) replicates[abs(replicates) > 1] <- NA means <- colMeans(replicates,na.rm=TRUE) sds <- apply(replicates,2,sd,na.rm=TRUE) if(length(means) > (nvar * nfactors ) ) { means.rot <- means[(nvar*nfactors +1):length(means)] sds.rot <- sds[(nvar*nfactors +1):length(means)] ci.rot.lower <- means.rot + qnorm(p/2) * sds.rot ci.rot.upper <- means.rot + qnorm(1-p/2) * sds.rot ci.rot <- data.frame(lower=ci.rot.lower,upper=ci.rot.upper) } else { rep.rots <- NULL means.rot <- NULL sds.rot <- NULL z.rot <- NULL ci.rot <- NULL } means <- matrix(means[1:(nvar*nfactors)],ncol=nfactors) sds <- matrix(sds[1:(nvar*nfactors)],ncol=nfactors) tci <- abs(means)/sds ptci <- 1-pnorm(tci) if(!is.null(rep.rots)) { tcirot <- abs(means.rot)/sds.rot ptcirot <- 1- pnorm(tcirot)} else {tcirot <- NULL ptcirot <- NULL} ci.lower <- means + qnorm(p/2) * sds ci.upper <- means + qnorm(1-p/2) * sds ci <- data.frame(lower = ci.lower,upper=ci.upper) class(means) <- "loadings" colnames(means) <- colnames(sds) <- colnames(fl) rownames(means) <- rownames(sds) <- rownames(fl) f$cis <- list(means = means,sds = sds,ci = ci,p =2*ptci, means.rot=means.rot,sds.rot=sds.rot,ci.rot=ci.rot,p.rot = ptcirot,Call= cl,replicates=replicates,rep.rots=rep.rots,mean.pair=mean.npairs,sds.pairs=sds.pairs) results <- f results$Call <- cl class(results) <- c("psych","fa.ci") } else {results <- f results$Call <- cl class(results) <- c("psych","fa") } return(results) } #written May 1 2011 #modified May 8, 2014 to make cis an object in f to make sorting easier psych/R/print.psych.stats.R0000644000176200001440000000170614444406736015362 0ustar liggesusers"print_psych.stats" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,...) { cat("Call: ") print(x$Call) nfactors <- x$factors objective <- x$criteria[1] if(!is.null(objective)) { cat("\nTest of the hypothesis that", nfactors, if (nfactors == 1) "factor is" else "factors are", "sufficient.\n") cat("\nThe degrees of freedom for the model is",x$dof," and the fit was ",round(objective,digits),"\n") if(!is.na(x$n.obs)) {cat("The number of observations was ",x$n.obs, " with Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits),"\n")} } cat("\nMeasures of factor score adequacy ",colnames(x$loadings) ) cat("\n Correlation of scores with factors ",round(sqrt(x$R2),digits)) cat("\nMultiple R square of scores with factors " ,round(x$R2,digits)) cat("\nMinimum correlation of factor score estimates ", round(2*x$R2 -1,digits),"\n") } #end of print.psych.stats psych/R/cta.R0000644000176200001440000001530511235401176012477 0ustar liggesusers"cta" <- function(n=3,t=5000, cues = NULL, act=NULL, inhibit=NULL,expect = NULL, consume = NULL,tendency = NULL,tstrength=NULL, type="both",fast=2 ,compare=FALSE,learn=TRUE,reward=NULL) { #simulation of the CTA reparamaterization of the dynamics of action #note, this is all for operation in a constant environment - what is needed is wrap the world around this. #That is, actions actually change cues #this should be a function to do all the work. the rest is just stats and graphics. #MODEL (dif equations from paper) ##here is the basic model tf <- function(tendency,cues,step,expect,act,consume) { tf <- tendency + cues %*% step %*% expect - act %*% step %*% consume } #tendencies at t-t af <- function(act,tendency,step,tstrength,inhibit) { af <- tendency %*% step %*% tstrength + act - act %*% step %*% inhibit} #actions at t-t #the learning function ef <- function(expect,act,step,consume,reward) {if(learn) { which.act <- which.max(act) #counting which act has won if(old.act!=which.act) { diag(temp) <- act %*% reward expect <- expect + temp #learn but only if a new action expect <- expect*1/tr(expect) #standardize expectancies old.act <- which.act}} ef <- expect } ## temp <- matrix(0,n,n) if(n > 4){ colours <- rainbow(n)} else {colours <- c("blue", "red", "black","green") } stepsize <- .05 tendency.start <- tendency act.start <- act expect.start <- expect if(is.null(cues)) {cues <- 2^(n-1:n)} #default cue str - cue vector represents inate strength of stim (cake vs peanut) if(is.null(inhibit)) {inhibit <- matrix(1,ncol=n,nrow=n) #loss matrix .05 on diag 1s off diag diag(inhibit) <- .05} if(is.null(tstrength)) tstrength <- diag(1,n) #if(is.null(tstrength)) tstrength <- diag(c(1,.5,.25)) if(n>1) {colnames(inhibit) <- rownames(inhibit) <- paste("A",1:n,sep="")} if(is.null(consume) ) {consume <- diag(.03,ncol=n,nrow=n) } step <- diag(stepsize,n) #this is the n CUES x k tendencyDENCIES matrix for Cue-tendency excitation weights if(is.null(expect)) expect <- diag(1,n) # a matrix of expectancies that cues lead to outcomes #first run for time= t to find the maximum values to make nice plots as well as to get the summary stats if (is.null(tendency.start)) {tendency <- rep(0,n)} else {tendency <- tendency.start} #default tendency = 0 if(is.null(act.start) ) {act <- cues} else {act <- act.start} #default actions = initial cues if(is.null(reward)) {reward <- matrix(0,n,n) diag(reward) <- c(rep(0,n-1),.05) } else {temp1 <- reward reward <- matrix(0,n,n) diag(reward) <- temp1 } #set up counters maxact <- minact <- mintendency <- maxtendency <- 0 counts <- rep(0,n) transitions <- matrix(0,ncol=n,nrow=n) frequency <- matrix(0,ncol=n,nrow=n) colnames(frequency) <- paste("T",1:n,sep="") rownames(frequency) <- paste("F",1:n,sep="") old.act <- which.max(act) #MODEL (dif equations from paper) for (i in 1:t) { tendency <- tf(tendency,cues,step,expect,act,consume) act <- af (act,tendency,step,tstrength,inhibit) act[act<0] <- 0 #add learning expect <- ef(expect,act,step,consume,reward) #END OF MODEL #STATS #calc max/min act/tendency maxact <- max(maxact,act) minact <- min(minact,act) maxtendency <- max(maxtendency,tendency) mintendency <- min(mintendency,tendency) #count which.act <- which.max(act) #counting which act has won counts[which.act] <- counts[which.act]+1 #time table transitions[old.act,which.act] <- transitions[old.act,which.act] + 1 #frequency table if(old.act!=which.act) { frequency[old.act,which.act] <- frequency[old.act,which.act] + 1 frequency[which.act,which.act] <- frequency[which.act,which.act] +1 } #learn but only if a new action old.act <- which.act } #PLOTS #now do various types of plots, depending upon the type of plot desired plots <- 1 action <- FALSE #state diagrams plot two tendencydencies agaist each other over time if (type!="none") {if (type=="state") { op <- par(mfrow=c(1,1)) if (is.null(tendency.start)) {tendency <- rep(0,n)} else {tendency <- tendency.start} if(is.null(act.start) ) {act <- cues} else {act <- act.start} plot(tendency[1],tendency[2],xlim=c(mintendency,maxtendency),ylim=c(mintendency,maxtendency),col="black", main="State diagram",xlab="tendency 1", ylab="tendency 2") for (i in 1:t) { tendency <- tf(tendency,cues,step,expect,act,consume) act <- af (act,tendency,step,tstrength,inhibit) #expect <- ef(expect,act,step,consume) act[act<0] <- 0 if(!(i %% fast)) points(tendency[1],tendency[2],col="black",pch=20,cex=.2) } } else { #the basic default is to plot action tendencydencies and actions in a two up graph if(type=="both") {if(compare) {op <- par(mfrow=c(2,2))} else {op <- par(mfrow=c(2,1))} plots <- 2 } else {op <- par(mfrow=c(1,1))} if (type=="action") {action <- TRUE} else {if(type=="tendencyd" ) action <- FALSE} for (k in 1:plots) { if (is.null(tendency.start)) {tendency <- rep(0,n)} else {tendency <- tendency.start} if(is.null(act.start) ) {act <- cues} else {act <- act.start} if(is.null(expect.start)) {expect <- diag(1,n)} else {expect <- expect.start} # a matrix of expectancies that cues lead to outcomes if(action ) plot(rep(1,n),act,xlim=c(0,t),ylim=c(minact,maxact),xlab="time",ylab="action", main="Actions over time") else plot(rep(1,n),tendency,xlim=c(0,t),ylim=c(mintendency,maxtendency),xlab="time",ylab="action tendency",main="Action tendencies over time") for (i in 1:t) { tendency <- tf(tendency,cues,step,expect,act,consume) act <- af (act,tendency,step,tstrength,inhibit) act[act<0] <- 0 ### maxact <- max(maxact,act) minact <- min(minact,act) maxtendency <- max(maxtendency,tendency) mintendency <- min(mintendency,tendency) #count which.act <- which.max(act) #counting which act has won counts[which.act] <- counts[which.act]+1 #time table transitions[old.act,which.act] <- transitions[old.act,which.act] + 1 #frequency table if(old.act!=which.act) { frequency[old.act,which.act] <- frequency[old.act,which.act] + 1 #frequency[which.act,which.act] <- frequency[which.act,which.act] +1 expect <- ef(expect,act,step,consume,reward) } #learn but only if a new action old.act <- which.act ## if(!(i %% fast) ) {if( action) points(rep(i,n),act,col=colours,cex=.2) else points(rep(i,n),tendency,col=colours,cex=.2) }} action <- TRUE} } } results <- list(cues=cues,expectancy=expect,strength=tstrength,inihibition=inhibit,consumation=consume,reinforcement=reward, time = counts,frequency=frequency, tendency=tendency, act=act) return(results) }psych/R/mixed.cor.R0000644000176200001440000002673713237415017013636 0ustar liggesusers #mixedCor was added April 28, 2017 to make mixed.cor easier to use "mixed.cor" <- function(x=NULL,p=NULL,d=NULL,smooth=TRUE,correct=.5,global=TRUE,ncat=8,use="pairwise",method="pearson",weight=NULL) { cat("\nmixed.cor is deprecated, please use mixedCor.") mixedCor(data=x,c=NULL,p=p,d=d,smooth=smooth,correct=correct,global=global,ncat=ncat,use=use,method=method,weight=weight) } "mixedCor" <- function(data=NULL,c=NULL,p=NULL,d=NULL,smooth=TRUE,correct=.5,global=TRUE,ncat=8,use="pairwise",method="pearson",weight=NULL) { cl <- match.call() original <- colnames(data) organize <- FALSE #the default is to specify the continuous, the polytomous and the dichotomous if((missing(c) | is.null(c)) && (missing(p) | is.null(p)) && (missing(d) | is.null(d))) { #figure out which kinds of variables we are using organize <- TRUE nvar <- ncol(data) data <- as.matrix(data) #to speed things up #first, check for data coded as factors, these mess up. We should quit with warning #also check for variables with no variance and flag them (don't remove, just quit. Likely problem with data) progressBar(nvar,nvar,"Preparing the data") ans <- matrix(NA,nrow=nvar,ncol=2) for (i in 1:nvar) { if (is.numeric(data[,i])) {ans[i,2] <- 1 } else { if ((is.factor(data[,i])) || (is.logical(data[,i]))) { ans[i,2] <- 2 } else { if (is.character(data[,i])) { ans[i,2] <- 3 } else {ans[i,2] <- 4} } } ans[i,1] <- sd(data[,i],na.rm=TRUE) } if(any(ans[,2] !=1)) {cat("\nNot all of the variables are numeric. Please check your data. \nPotential bad items are ") print(which(ans[,2] !=1)) stop ("\nI am stopping because of the problem with the data.") } bad <- which(ans[,1] ==0) if(length(bad) > 0) {cat("\nSome of the variables have no variance. Please remove them and try again.\nBad items are ") print(bad) stop("\nI am stopping because of the problems with the data.") } tab <- apply(data,2,table) if(is.list(tab)) {len <- lapply(tab,length)} else {len <- dim(tab)[1] } dvars <- subset(1:nvar,len==2) #find the dichotomous variable by number pvars <- subset(1:nvar,((len > 2) & (len <= ncat))) #find the polytomous variables cvars <- subset(1:nvar,(len > ncat)) #find the continuous variables (more than ncat levels) #This next part is not as efficient as it should be, because it is making up 3 new matrices, where we could just do it by reference -- pass the names, not the data #if(length(dvars) > 0) {d <- as.matrix(x[,dvars],ncol=length(dvars)) # colnames(d) <- colnames(x)[dvars]} else {d <- NULL} if(length(pvars) > 0) {#p <- as.matrix(x[,pvars],ncol=length(pvars)) # colnames(p) <- colnames(x)[pvars] # tab <- table(p) #now check to make sure that they are all on the same scale # if(length(tab) > ncat) stop("I tried to figure out which were continuous and which were polytomous, but failed. Please try again by specifying x, p, and d.") tab <- apply(data[,pvars,drop=FALSE],2,table) #if all elements have the same number of categories, this is a matrix, otherwise, it is a list if(is.list(tab)) { ok <- apply(data[,pvars,drop=FALSE], 2,function (x) {if (length(table(x)) != (max(x,na.rm=TRUE) - min(x,na.rm=TRUE)+1)) {FALSE} else {TRUE}}) if(any(!ok)) {bad <- which(!ok) cat("\n Some polytomous variables have fewer categories than they should. Please check your data. \nPotential bad items are ",colnames(p)[bad],"\n") stop("\nI am stopping because of the problem with polytomous data") } } } else {p <- NULL} # if(length(cvars) > 0) {cont <- matrix(x[,cvars],ncol=length(cvars)) # colnames(cont) <- colnames(x)[cvars] } else {cont <- NULL} #Rho <- mixed.cor1(cont,p, d,smooth=smooth,global=global,correct=correct,use=use,method=method,weight=weight) progressBar(nvar,nvar,"Starting mixed.cor1") Rho <- mixed.cor1(data,cvars,pvars, dvars,smooth=smooth,global=global,correct=correct,use=use,method=method,weight=weight) oldorder <- c(cvars,pvars,dvars) ord <- order(oldorder) Rho$rho <- Rho$rho[ord,ord] } else {# organization is specified #if ((p+ d) == nvar) #if(!missing(c)) x <- data[c] #if(!missing(p)) p <- data[p] #if(!missing(d)) d <- data[d] progressBar(1,1,"Starting mixed.cor1") Rho <- mixed.cor1(data,c=c,p=p,d=d,smooth=smooth,global=global,correct=correct,use=use,method=method,weight=weight)} orig <- original %in% colnames(Rho$rho) orig <- original[orig] Rho$rho <- Rho$rho[orig,orig] #organize them in the way they came in Rho$Call <- cl return(Rho) } #modified April 29th (2014) to get around the problem of missing rows or columns in the polychoric function. #modified 1/1/14 to add multicore capability #deprecated as of 02/05/18 # "mixed.cor" <- # function(x=NULL,p=NULL,d=NULL,smooth=TRUE,correct=.5,global=TRUE,ncat=8,use="pairwise",method="pearson",weight=NULL) { # cl <- match.call() # organize <- FALSE #the default is to specify the continuous, the polytomous and the dichotomous # if(!is.null(x) && is.null(p) && is.null(d)) { #figure out which kinds of variables we are using # organize <- TRUE # nvar <- ncol(x) # x <- as.matrix(x) # tab <- apply(x,2,function(x) table(x)) # if(is.list(tab)) {len <- lapply(tab,function(x) length(x))} else {len <- dim(tab)[1] } # dvars <- subset(1:nvar,len==2) #find the dichotomous variables # pvars <- subset(1:nvar,((len > 2) & (len <= ncat))) #find the polytomous variables # cvars <- subset(1:nvar,(len > ncat)) #find the continuous variables (more than ncat levels) # # if(length(dvars) > 0) {d <- as.matrix(x[,dvars],ncol=length(dvars)) # colnames(d) <- colnames(x)[dvars]} else {d <- NULL} # if(length(pvars) > 0) {p <- as.matrix(x[,pvars],ncol=length(pvars)) # colnames(p) <- colnames(x)[pvars] # tab <- table(p) #now check to make sure that they are all on the same scale # if(length(tab) > ncat) stop("I tried to figure out which were continuous and which were polytomous, but failed. Please try again by specifying x, p, and d.") # ok <- apply(p, 2,function (x) {if (length(table(x)) != (max(x,na.rm=TRUE) - min(x,na.rm=TRUE)+1)) {FALSE} else {TRUE}}) # if(any(!ok)) {bad <- which(!ok) # cat("\n Some polytomous variables have fewer categories than they should. Please check your data. \nPotential bad items are ",colnames(p)[bad],"\n") # # stop("\nI am stopping because of the problem with polytomous data") # } # } else {p <- NULL} # if(length(cvars) > 0) {cont <- matrix(x[,cvars],ncol=length(cvars))tab <- apply(x,2,table) # if(is.list(tab)) {len <- lapply(tab,length)} else {len <- dim(tab)[1] } # dvars <- subset(1:nvar,len==2) #find the dichotomous variable by number # pvars <- subset(1:nvar,((len > 2) & (len <= ncat))) #find the polytomous variables # cvars <- subset(1:nvar,(len > ncat)) #find the continuous variables (more than ncat levels) # colnames(cont) <- colnames(x)[cvars] } else {cont <- NULL} # # Rho <- mixed.cor1(cont,p, d,smooth=smooth,global=global,correct=correct,use=use,method=method,weight=weight) # oldorder <- c(cvars,pvars,dvars) # ord <- order(oldorder) # Rho$rho <- Rho$rho[ord,ord] # } else {# organization is specified # #if ((p+ d) == nvar) # Rho <- mixed.cor1(x=x,p=p,d=d,smooth=smooth,global=global,correct=correct,use=use,method=method,weight=weight)} # # Rho$Call <- cl # return(Rho) # } #December 22,2010 #revised July 15, 2011 to work for the various special cases #meant to combine continuous, polytomous and dichotomous correlations #revised October 12, 2011 to get around the sd of vectors problem #revised Sept 10, 2013 to allow for dichotomies with different minima "mixed.cor1" <- function(data,c=NULL,p=NULL,d=NULL,smooth=TRUE,global=TRUE,correct=correct,use=use,method=method,weight=NULL) { cl <- match.call() x <- c #the continuous variables # if(!is.null(x)) {nx <- dim(x)[2]} else {nx <- 0} # if(!is.null(p)) {np <- dim(p)[2]} else {np <- 0} # if(!is.null(d)) {nd <- dim(d)[2]} else {nd <- 0} # if(is.null(nx)) nx <- 1 # if(is.null(np)) np <- 1 # if(is.null(nd)) nd <- 1 if(!is.null(x)) {nx <- length(x)} else {nx <- 0} if(!is.null(p)) {np <- length(p)} else {np <- 0} if(!is.null(d)) {nd <-length(d)} else {nd <- 0} npd <- nx +np + nd rpd <- NULL #in case we don't have any #check to make sure all the data are ok for doing the appropriate analyses #first check for polychorics #but we did this before in mixedCor. If the person specified the p and d, assume they are correct # if(np > 0) { ptable <- table(as.matrix(data[,p])) #haven't we already done this? # nvalues <- length(ptable) #find the number of response alternatives # if(nvalues > 8) stop("You have more than 8 categories for your items, polychoric is probably not needed")} # # #now test for tetrachorics # if(nd > 0) { # dm <- apply(data[,d,drop=FALSE],2,function(x) min(x,na.rm=TRUE)) # data[,d] <- t(t(data[,d]) - dm) # #d <- d -min(d,na.rm=TRUE) #in case the numbers are not 0,1 This requires them all to be the same # if(max(data[,d,drop=FALSE],na.rm=TRUE) > 1) {stop("Tetrachoric correlations require dichotomous data")}} if(nx > 1) { progressBar(nx,nx,"Finding Pearson correlations") rx <- cor(data[,x],use=use,method=method)} else {if(nx < 1) {rx <- NULL rho <- NULL} else {rx <- 1 rho <- 1}} if(np > 1) { #cat("\n Starting to find the polychoric correlations") progressBar(np,np,"Finding polychorics") rp <- polychoric(data[,p],smooth=smooth,global=global,weight=weight,correct=correct)} else {if (np == 1) { rho <- 1 names(rho) <- colnames(p) rp <- list(rho=rho,tau=NULL)} else {rp <- list(rho= NULL,tau=NULL)}} if(nd > 1) { # cat("n Starting to find the tetrachoric correlations\n\n") progressBar(nd,nd,"Finding tetrachorics") rd <- tetrachoric(data[,d],smooth=smooth,correct=correct,weight=weight)} else {if (nd == 1) {rd <- list(rho=1,tau=NULL)} else {rd <- list(rho=NULL,tau=NULL)}} if(nx > 0) {if(np > 0) {rxp <- polyserial(data[,x,drop=FALSE],data[,p,drop=FALSE]) #the normal case is for all three to exist tmixed <- cbind(rx,t(rxp)) lmixed <- cbind(rxp,rp$rho) rho <- rbind(tmixed,lmixed)} else {rho <- rx} #we now have x and p if(nd > 0) { rxd <- biserial(data[,x,drop=FALSE],data[,d,drop=FALSE]) if(np > 0) { rpd <- polydi(data[,p,drop=FALSE],data[,d,drop=FALSE],global=global,correct=correct)$rho #passing the global value (July 10, 2017) topright <- t(cbind(rxd,t(rpd))) } else { topright <- t(rxd)} tmixed <- cbind(rho,topright) lmixed <- cbind(t(topright),rd$rho) rho <- rbind(tmixed,lmixed) } } else { #the case of nx =0 if( np > 0) { if (nd > 0 ) { progressBar(nd,nd,"Starting polydi") rpd <- polydi(data[,p,drop=FALSE],data[,d,drop=FALSE],global=global,correct=correct)$rho #added global (July 10, 2017) But this causes problems tmixed <- cbind(rp$rho,rpd) lmixed <- cbind(t(rpd),rd$rho) rho <- rbind(tmixed,lmixed) } else {rho <- rp$rho} } else { rho <- rd$rho} } colnames(rho) <- rownames(rho) class(rho) <- c("psych","mixed") if(!is.null(rx)) class(rx) <- c("psych","mixed") mixed <- list(rho=rho,rx=rx,poly=rp,tetra=rd,rpd=rpd,Call=cl) class(mixed) <- c("psych","mixed") return(mixed) } psych/R/structure.diagram.R0000644000176200001440000013620414524557625015414 0ustar liggesusers#Created September 25, 2009 #modified November 28, 2009 to allow top down as well as left right (default) #based upon structure graph but not using Rgraphviz #creates a structural equation path diagram, draws it, and saves sem commands #modified again in December, 2009 to add Rx, Ry options #modified to produce correct sem and lavaan code October 2018 #modified 11/14/18 to do mimic models "structure.diagram" <- function(fx=NULL,Phi=NULL,fy=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE,regression=FALSE,lr=TRUE,Rx=NULL,Ry=NULL, digits=1,e.size=.1,main="Structural model", ...){ #first some default values xmodel <- fx ymodel <- fy num.y <- num.x <- 0 #we assume there is nothing there if(!is.null(fx) ) { #this is the normal case e.size <- e.size*8/(NROW(fx)) #check if input is from a factor analysis or omega analysis if(!is.null(class(xmodel)) && (length(class(xmodel))>1)) { if((inherits(xmodel,"psych") && inherits(xmodel, "omega"))) { Phi <- xmodel$schmid$phi xmodel <- xmodel$schmid$oblique} else { if((inherits(xmodel,"psych") && ((inherits(xmodel,"fa") | (inherits(xmodel,"principal")))))) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings)} }} else { if(!is.matrix(xmodel) & !is.data.frame(xmodel) &!is.vector(xmodel)) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings) } else {xmodel <- xmodel} } #first some basic setup parameters -- these just convert the various types of input if(!is.matrix(xmodel) ) {factors <- as.matrix(xmodel)} else {factors <- xmodel} num.var <- num.xvar <- dim(factors)[1] #how many x variables? if (is.null(num.xvar) ){num.xvar <- length(factors) num.xfactors <- 1} else { num.factors <- num.xfactors <- dim(factors)[2]} if(is.null(labels)) {vars <- xvars <- rownames(xmodel)} else { xvars <- vars <- labels} if(is.null(vars) ) {vars <- xvars <- paste("x",1:num.xvar,sep="") } fact <- colnames(xmodel) if (is.null(fact) ) { fact <- paste("X",1:num.xfactors,sep="") } if(is.numeric(factors)) {factors <- round(factors,digits) } } else {#fx is NULL This is for the case where we want to do some fancy graphics of sems num.xvar <- dim(Rx)[1] if(is.null(num.xvar)) num.xvar <- 0 num.xfactors <- 0 num.yfactors <- 0 num.factors <- 0 fact <- NULL if(is.null(labels)) {vars <- xvars <- rownames(Rx)} else { xvars <- vars <- labels} } num.yfactors <- 0 num.yvar <- 0 if (!is.null(ymodel)) { if(is.list(ymodel) & !is.data.frame(ymodel) ) {ymodel <- as.matrix(ymodel$loadings)} else {ymodel <- ymodel} if(!is.matrix(ymodel) ) {y.factors <- as.matrix(ymodel)} else {y.factors <- ymodel} num.y <- dim(y.factors)[1] if (is.null(num.y)) { num.y <- length(ymodel) num.yfactors <- 1} else { num.yfactors <- dim(y.factors)[2] } num.yvar <- num.y yvars <- rownames(ymodel) if(is.null(yvars)) {yvars <- paste("y",1:num.y,sep="") } if(is.null(labels)) {vars <- c(xvars,yvars)} else {yvars <- labels[(num.xvar+1):(num.xvar+num.y)]} yfact <- colnames(ymodel) if(is.null(yfact)) {yfact <- paste("Y",1:num.yfactors,sep="") } fact <- c(fact,yfact) if(is.numeric(y.factors)) {y.factors <- round(y.factors,digits) } }#end of if(null(y.model)) if(!is.null(Ry)& is.null(ymodel)) {num.yvar <- num.y <- dim(Ry)[1] yvars <- colnames(Ry)} #do we want to draw the inter Y correlations? num.var <- num.xvar + num.y if((num.xfactors > 0 ) & (num.yfactors > 0) & is.null(Phi)) {mimic <- TRUE num.factors <- max(num.xfactors,num.yfactors)} else {mimic <- FALSE num.factors <- num.xfactors + num.yfactors} sem <- matrix(rep(NA),6*(num.var*num.factors + num.factors),ncol=3) #this creates an output model for sem analysis lavaan <- vector("list",num.xfactors + num.yfactors) #create a list for lavaan colnames(sem) <- c("Path","Parameter","Value") var.rect <- list() fact.rect <- list() if(is.numeric(Phi) ) { Phi <- round(Phi,digits)} if(!is.null(Rx)) {x.curves <- 2 if(is.numeric(Rx) ) { Rx <- round(Rx,digits)}} else {x.curves <- 0 } if(!is.null(Ry)) {y.curves <- 3 if(is.numeric(Ry) ) { Ry <- round(Ry,digits)}} else {y.curves <- 0} ## now do the basic scaling of the figure ###create the basic figure ### length.labels <- 0 # a filler for now #plot.new() is necessary if we have not plotted before #strwd <- try(strwidth(xvars),silent=TRUE) strwd <- try(length.labels <- max(strwidth(xvars),strwidth("abc"))/1.8,silent=TRUE) #although this throws an error if the window is not already open, we don't show it #if (class(strwd) == "try-error" ) {plot.new() } if (inherits(strwd, "try-error" )) {length.labels = max(nchar(xvars),3)/1.8 } #length.labels <- max(strwidth(xvars),strwidth("abc"))/1.8 if(lr) {limx <- c(-(length.labels + x.curves+ errors/4),max(num.xvar,num.yvar)+2 + y.curves) limy <- c(0,max(num.xvar,num.yvar)+1) } else { limy <- c(-(length.labels +x.curves),max(num.xvar,num.yvar) +2 + y.curves+errors) limx <- c(0,max(num.xvar,num.yvar)+1) # if( errors) limy <- c(-1,max(num.xvar,num.yvar)+2) } scale.xaxis <- 3 #max(num.xvar +1,num.yvar+1)/(num.xfactors+1) if(lr) {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main)} else {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) } #now draw the x part #we want to center the x factors on the left side #this requires adding an adjustment if there are more y variables than x variables. #do not draw any x variable if fx is not specified x.adj <- max(0,num.yvar - num.xvar)/2 k <- num.factors x.scale <- max(num.xvar +1,num.yvar+1)/(num.xvar+1) if(num.xvar > 0) { #the normal case for (v in 1:num.xvar) { if(lr) { var.rect[[v]] <- dia.rect(0,(num.xvar-v+1)*x.scale ,xvars[v],xlim=limx,ylim=limy,...) } else { var.rect[[v]] <- dia.rect(v*x.scale,0,xvars[v],xlim=limy,ylim=limx,...) } } nvar <- num.xvar if(mimic) { f.scale <- limy[2]/(num.xfactors+1) x.adj <- 0} else { f.scale <- max(num.xvar +1,num.yvar+1)/(num.xfactors+1)} if (num.xfactors >0) { for (f in 1:num.xfactors) { if(!regression) {if(lr) {fact.rect[[f]] <- dia.ellipse(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=limx,ylim=limy,e.size=e.size,...)} else {fact.rect[[f]] <- dia.ellipse(f * f.scale ,limy[2]/scale.xaxis,fact[f],ylim=limy,xlim=limx,e.size=e.size,...) } } else {if(lr) {fact.rect[[f]] <- dia.rect(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} else { fact.rect[[f]] <- dia.rect(f*f.scale,limy[2]/scale.xaxis,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} } for (v in 1:num.xvar) { if(is.numeric(factors[v,f])) { if(simple && (abs(factors[v,f]) == max(abs(factors[v,])) ) && (abs(factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if (!regression & !mimic) {if(lr){dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1),adj=(v %% 2)) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1), adj = (v %% 2)) } } else {dia.arrow(to=fact.rect[[f]]$left,from=var.rect[[v]]$right,labels =factors[v,f], adj = (v %% 2), col=((sign(factors[v,f])<0) +1))} } } else { if (factors[v,f] !="0") { if (!regression & !mimic) { if(lr) {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f],adj = (v %% 2)) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f],adj = (v %% 2))} } else {if(lr) {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$right,labels =factors[v,f],adj = (v %% 2))} else {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$top,labels =factors[v,f],adj = (v %% 2))} } } } } } if (num.xfactors ==1) { lavaan[[1]] <- paste(fact[1],"=~ ") for(i in 1:num.xvar) { sem[i,1] <- paste(fact[1],"->",vars[i],sep="") lavaan[[1]] <- paste0(lavaan[[1]], ' + ', vars[i]) if(is.numeric(factors[i])) {sem[i,2] <- vars[i]} else {sem[i,2] <- factors[i] } }} #end of if num.xfactors ==1 k <- num.xvar+1 k <- 1 for (f in 1:num.xfactors) { #if (!is.numeric(factors[i,f]) || (abs(factors[i,f]) > cut)) lavaan[[f]] <- paste0(fact[f] ," =~ ") for (i in 1:num.xvar) { if((!is.numeric(factors[i,f] ) && (factors[i,f] !="0"))|| ((is.numeric(factors[i,f]) && abs(factors[i,f]) > cut ))) { sem[k,1] <- paste(fact[f],"->",vars[i],sep="") lavaan[[f]] <- paste0(lavaan[[f]], ' + ', vars[i]) if(is.numeric(factors[i,f])) {sem[k,2] <- paste("F",f,vars[i],sep="")} else {sem[k,2] <- factors[i,f]} k <- k+1 } #end of if } } } #end of if num.xfactors > 0 if(errors & !mimic) { for (i in 1:num.xvar) {if(lr) { dia.self(var.rect[[i]],side=2) } else { dia.self(var.rect[[i]],side=1)} sem[k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[k,2] <- paste("x",i,"e",sep="") k <- k+1 } } } else {nvar <- 0} #now, if there is a ymodel, do it for y model if(!is.null(ymodel)| !is.null(Ry)) { if(lr) { y.adj <- min(0,(num.yvar/2 - num.xvar/2)) f.yscale <- limy[2]/(num.yfactors+1) y.fadj <- 0} else { y.adj <- num.xvar/2 - num.yvar/2 f.yscale <- limx[2]/(num.yfactors+1) y.fadj <- 0} y.scale <- max(num.xvar +1,num.yvar+1)/(num.yvar+1) for (v in 1:num.yvar) { if(lr){ var.rect[[v+num.xvar]] <- dia.rect(limx[2]-y.curves-errors/2,limy[2]-v + y.adj,yvars[v],xlim=limx,ylim=limy,...)} else { var.rect[[v+num.xvar]] <- dia.rect(v * y.scale,limx[2],yvars[v],xlim=limy,ylim=limx,...)} } } #we have drawn the y variables, now should we draw the Y factors if(!is.null(ymodel)){ for (f in 1:num.yfactors) { if(!mimic) {if(lr) { fact.rect[[f+num.xfactors]] <- dia.ellipse(2*limx[2]/scale.xaxis,(num.yfactors+1-f)*f.yscale +y.fadj,yfact[f],xlim=limx,ylim=limy,e.size=e.size,...)} else { fact.rect[[f+num.xfactors]] <- dia.ellipse(f*f.yscale+ y.fadj,2*limx[2]/scale.xaxis,yfact[f],ylim=limy,xlim=limx,e.size=e.size,...)} } else {fact.rect[[f+num.xfactors]] <- fact.rect[[f]] } for (v in 1:num.yvar) {if(is.numeric(y.factors[v,f])) { {if(simple && (abs(y.factors[v,f]) == max(abs(y.factors[v,])) ) && (abs(y.factors[v,f]) > cut) | (!simple && (abs(y.factors[v,f]) > cut))) { if(lr) { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(y.factors[v,f])<0) +1),adj = (v %% 2))} else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(y.factors[v,f])<0) +1),adj = (v %% 2) )} } } } else {if(factors[v,f] !="0") {if(lr) {dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f],adj = (v %% 2)) } else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],adj = (v %% 2)) } } }} } if (num.yfactors ==1) { lavaan[[num.xfactors +1 ]] <- paste(fact[num.xfactors +1], "=~") for (i in 1:num.y) { sem[k,1] <- paste(fact[1+num.xfactors],"->",yvars[i],sep="") lavaan[[num.xfactors +1]] <- paste0(lavaan[[num.xfactors +1]], ' + ', yvars[i]) if(is.numeric(y.factors[i] ) ) {sem[k,2] <- paste("Fy",yvars[i],sep="")} else {sem[k,2] <- y.factors[i]} k <- k +1 } } else { #end of if num.yfactors ==1 for (f in 1:num.yfactors) { lavaan[[num.xfactors +f ]] <- paste(fact[num.xfactors +f], "=~") for (i in 1:num.y) { if( (y.factors[i,f] !="0") && (abs(y.factors[i,f]) > cut )) { lavaan[[num.xfactors +f]] <- paste0(lavaan[[num.xfactors +f]], ' + ', yvars[i]) sem[k,1] <- paste(fact[f+num.xfactors],"->",vars[i+num.xvar],sep="") if(is.numeric(y.factors[i,f])) { sem[k,2] <- paste("Fy",f,vars[i+num.xvar],sep="")} else {sem[k,2] <- y.factors[i,f]} k <- k+1 } #end of if } #end of factor } # end of variable loop } #end of else if # } if(errors) { for (i in 1:num.y) { if(lr) {dia.self(var.rect[[i+num.xvar]],side=4) } else {dia.self(var.rect[[i+num.xvar]],side=3)} sem[k,1] <- paste(vars[i+num.xvar],"<->",vars[i+num.xvar],sep="") sem[k,2] <- paste("y",i,"e",sep="") k <- k+1 }} } #end of if.null(ymodel) if(!is.null(Rx)) {#draw the correlations between the x variables for (i in 2:num.xvar) { for (j in 1:(i-1)) { if((!is.numeric(Rx[i,j] ) && ((Rx[i,j] !="0")||(Rx[j,i] !="0")))|| ((is.numeric(Rx[i,j]) && abs(Rx[i,j]) > cut ))) { if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} } else { if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} } }} } } if(!is.null(Ry)) {#draw the correlations between the y variables for (i in 2:num.yvar) { for (j in 1:(i-1)) { if((!is.numeric(Ry[i,j] ) && ((Ry[i,j] !="0")||(Ry[j,i] !="0")))|| ((is.numeric(Ry[i,j]) && abs(Ry[i,j]) > cut ))) { if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=(i-j)/num.xvar)} } else { if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=#Created September 25, 2009 #modified November 28, 2009 to allow top down as well as left right (default) #based upon structure graph but not using Rgraphviz #creates a structural equation path diagram, draws it, and saves sem commands #modified again in December, 2009 to add Rx, Ry options #modified to produce correct sem and lavaan code October 2018 #modified 11/14/18 to do mimic models "structure.diagram" <- function(fx=NULL,Phi=NULL,fy=NULL,labels=NULL,cut=.3,errors=FALSE,simple=TRUE,regression=FALSE,lr=TRUE,Rx=NULL,Ry=NULL, digits=1,e.size=.1,main="Structural model", ...){ #first some default values xmodel <- fx ymodel <- fy num.y <- num.x <- 0 #we assume there is nothing there if(!is.null(fx) ) { #this is the normal case e.size <- e.size*8/(NROW(fx)) #check if input is from a factor analysis or omega analysis if(!is.null(class(xmodel)) && (length(class(xmodel))>1)) { if((inherits(xmodel,"psych") && inherits(xmodel, "omega"))) { Phi <- xmodel$schmid$phi xmodel <- xmodel$schmid$oblique} else { if((inherits(xmodel,"psych") && ((inherits(xmodel,"fa") | (inherits(xmodel,"principal")))))) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings)} }} else { if(!is.matrix(xmodel) & !is.data.frame(xmodel) &!is.vector(xmodel)) { if(!is.null(xmodel$Phi)) Phi <- xmodel$Phi xmodel <- as.matrix(xmodel$loadings) } else {xmodel <- xmodel} } #first some basic setup parameters -- these just convert the various types of input if(!is.matrix(xmodel) ) {factors <- as.matrix(xmodel)} else {factors <- xmodel} num.var <- num.xvar <- dim(factors)[1] #how many x variables? if (is.null(num.xvar) ){num.xvar <- length(factors) num.xfactors <- 1} else { num.factors <- num.xfactors <- dim(factors)[2]} if(is.null(labels)) {vars <- xvars <- rownames(xmodel)} else { xvars <- vars <- labels} if(is.null(vars) ) {vars <- xvars <- paste("x",1:num.xvar,sep="") } fact <- colnames(xmodel) if (is.null(fact) ) { fact <- paste("X",1:num.xfactors,sep="") } if(is.numeric(factors)) {factors <- round(factors,digits) } } else {#fx is NULL This is for the case where we want to do some fancy graphics of sems num.xvar <- dim(Rx)[1] if(is.null(num.xvar)) num.xvar <- 0 num.xfactors <- 0 num.yfactors <- 0 num.factors <- 0 fact <- NULL if(is.null(labels)) {vars <- xvars <- rownames(Rx)} else { xvars <- vars <- labels} } num.yfactors <- 0 num.yvar <- 0 if (!is.null(ymodel)) { if(is.list(ymodel) & !is.data.frame(ymodel) ) {ymodel <- as.matrix(ymodel$loadings)} else {ymodel <- ymodel} if(!is.matrix(ymodel) ) {y.factors <- as.matrix(ymodel)} else {y.factors <- ymodel} num.y <- dim(y.factors)[1] if (is.null(num.y)) { num.y <- length(ymodel) num.yfactors <- 1} else { num.yfactors <- dim(y.factors)[2] } num.yvar <- num.y yvars <- rownames(ymodel) if(is.null(yvars)) {yvars <- paste("y",1:num.y,sep="") } if(is.null(labels)) {vars <- c(xvars,yvars)} else {yvars <- labels[(num.xvar+1):(num.xvar+num.y)]} yfact <- colnames(ymodel) if(is.null(yfact)) {yfact <- paste("Y",1:num.yfactors,sep="") } fact <- c(fact,yfact) if(is.numeric(y.factors)) {y.factors <- round(y.factors,digits) } }#end of if(null(y.model)) if(!is.null(Ry)& is.null(ymodel)) {num.yvar <- num.y <- dim(Ry)[1] yvars <- colnames(Ry)} #do we want to draw the inter Y correlations? num.var <- num.xvar + num.y if((num.xfactors > 0 ) & (num.yfactors > 0) & is.null(Phi)) {mimic <- TRUE num.factors <- max(num.xfactors,num.yfactors)} else {mimic <- FALSE num.factors <- num.xfactors + num.yfactors} sem <- matrix(rep(NA),6*(num.var*num.factors + num.factors),ncol=3) #this creates an output model for sem analysis lavaan <- vector("list",num.xfactors + num.yfactors) #create a list for lavaan colnames(sem) <- c("Path","Parameter","Value") var.rect <- list() fact.rect <- list() if(is.numeric(Phi) ) { Phi <- round(Phi,digits)} if(!is.null(Rx)) {x.curves <- 2 if(is.numeric(Rx) ) { Rx <- round(Rx,digits)}} else {x.curves <- 0 } if(!is.null(Ry)) {y.curves <- 3 if(is.numeric(Ry) ) { Ry <- round(Ry,digits)}} else {y.curves <- 0} ## now do the basic scaling of the figure ###create the basic figure ### length.labels <- 0 # a filler for now #plot.new() is necessary if we have not plotted before #strwd <- try(strwidth(xvars),silent=TRUE) strwd <- try(length.labels <- max(strwidth(xvars),strwidth("abc"))/1.8,silent=TRUE) #although this throws an error if the window is not already open, we don't show it #if (class(strwd) == "try-error" ) {plot.new() } if (inherits(strwd, "try-error" )) {length.labels = max(nchar(xvars),3)/1.8 } #length.labels <- max(strwidth(xvars),strwidth("abc"))/1.8 if(lr) {limx <- c(-(length.labels + x.curves+ errors/4),max(num.xvar,num.yvar)+2 + y.curves) limy <- c(0,max(num.xvar,num.yvar)+1) } else { limy <- c(-(length.labels +x.curves),max(num.xvar,num.yvar) +2 + y.curves+errors) limx <- c(0,max(num.xvar,num.yvar)+1) # if( errors) limy <- c(-1,max(num.xvar,num.yvar)+2) } scale.xaxis <- 3 #max(num.xvar +1,num.yvar+1)/(num.xfactors+1) if(lr) {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main)} else {plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main) } #now draw the x part #we want to center the x factors on the left side #this requires adding an adjustment if there are more y variables than x variables. #do not draw any x variable if fx is not specified x.adj <- max(0,num.yvar - num.xvar)/2 k <- num.factors x.scale <- max(num.xvar +1,num.yvar+1)/(num.xvar+1) if(num.xvar > 0) { #the normal case for (v in 1:num.xvar) { if(lr) { var.rect[[v]] <- dia.rect(0,(num.xvar-v+1)*x.scale ,xvars[v],xlim=limx,ylim=limy,...) } else { var.rect[[v]] <- dia.rect(v*x.scale,0,xvars[v],xlim=limy,ylim=limx,...) } } nvar <- num.xvar if(mimic) { f.scale <- limy[2]/(num.xfactors+1) x.adj <- 0} else { f.scale <- max(num.xvar +1,num.yvar+1)/(num.xfactors+1)} if (num.xfactors >0) { for (f in 1:num.xfactors) { if(!regression) {if(lr) {fact.rect[[f]] <- dia.ellipse(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=limx,ylim=limy,e.size=e.size,...)} else {fact.rect[[f]] <- dia.ellipse(f * f.scale ,limy[2]/scale.xaxis,fact[f],ylim=limy,xlim=limx,e.size=e.size,...) } } else {if(lr) {fact.rect[[f]] <- dia.rect(limx[2]/scale.xaxis,(num.xfactors+1-f)*f.scale,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} else { fact.rect[[f]] <- dia.rect(f*f.scale,limy[2]/scale.xaxis,fact[f],xlim=c(0,nvar),ylim=c(0,nvar),...)} } for (v in 1:num.xvar) { if(is.numeric(factors[v,f])) { if(simple && (abs(factors[v,f]) == max(abs(factors[v,])) ) && (abs(factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if (!regression & !mimic) {if(lr){dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1),adj=(v %% 2)) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f],col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0) +1), adj = (v %% 2)) } } else {dia.arrow(to=fact.rect[[f]]$left,from=var.rect[[v]]$right,labels =factors[v,f], adj = (v %% 2), col=((sign(factors[v,f])<0) +1))} } } else { if (factors[v,f] !="0") { if (!regression & !mimic) { if(lr) {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =factors[v,f],adj = (v %% 2)) } else {dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$top,labels =factors[v,f],adj = (v %% 2))} } else {if(lr) {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$right,labels =factors[v,f],adj = (v %% 2))} else {dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$top,labels =factors[v,f],adj = (v %% 2))} } } } } } if (num.xfactors ==1) { lavaan[[1]] <- paste(fact[1],"=~ ") for(i in 1:num.xvar) { sem[i,1] <- paste(fact[1],"->",vars[i],sep="") lavaan[[1]] <- paste0(lavaan[[1]], ' + ', vars[i]) if(is.numeric(factors[i])) {sem[i,2] <- vars[i]} else {sem[i,2] <- factors[i] } }} #end of if num.xfactors ==1 k <- num.xvar+1 k <- 1 for (f in 1:num.xfactors) { #if (!is.numeric(factors[i,f]) || (abs(factors[i,f]) > cut)) lavaan[[f]] <- paste0(fact[f] ," =~ ") for (i in 1:num.xvar) { if((!is.numeric(factors[i,f] ) && (factors[i,f] !="0"))|| ((is.numeric(factors[i,f]) && abs(factors[i,f]) > cut ))) { sem[k,1] <- paste(fact[f],"->",vars[i],sep="") lavaan[[f]] <- paste0(lavaan[[f]], ' + ', vars[i]) if(is.numeric(factors[i,f])) {sem[k,2] <- paste("F",f,vars[i],sep="")} else {sem[k,2] <- factors[i,f]} k <- k+1 } #end of if } } } #end of if num.xfactors > 0 if(errors & !mimic) { for (i in 1:num.xvar) {if(lr) { dia.self(var.rect[[i]],side=2) } else { dia.self(var.rect[[i]],side=1)} sem[k,1] <- paste(vars[i],"<->",vars[i],sep="") sem[k,2] <- paste("x",i,"e",sep="") k <- k+1 } } } else {nvar <- 0} #now, if there is a ymodel, do it for y model if(!is.null(ymodel)| !is.null(Ry)) { if(lr) { y.adj <- min(0,(num.yvar/2 - num.xvar/2)) f.yscale <- limy[2]/(num.yfactors+1) y.fadj <- 0} else { y.adj <- num.xvar/2 - num.yvar/2 f.yscale <- limx[2]/(num.yfactors+1) y.fadj <- 0} y.scale <- max(num.xvar +1,num.yvar+1)/(num.yvar+1) for (v in 1:num.yvar) { if(lr){ var.rect[[v+num.xvar]] <- dia.rect(limx[2]-y.curves-errors/2,limy[2]-v + y.adj,yvars[v],xlim=limx,ylim=limy,...)} else { var.rect[[v+num.xvar]] <- dia.rect(v * y.scale,limx[2],yvars[v],xlim=limy,ylim=limx,...)} } } #we have drawn the y variables, now should we draw the Y factors if(!is.null(ymodel)){ for (f in 1:num.yfactors) { if(!mimic) {if(lr) { fact.rect[[f+num.xfactors]] <- dia.ellipse(2*limx[2]/scale.xaxis,(num.yfactors+1-f)*f.yscale +y.fadj,yfact[f],xlim=limx,ylim=limy,e.size=e.size,...)} else { fact.rect[[f+num.xfactors]] <- dia.ellipse(f*f.yscale+ y.fadj,2*limx[2]/scale.xaxis,yfact[f],ylim=limy,xlim=limx,e.size=e.size,...)} } else {fact.rect[[f+num.xfactors]] <- fact.rect[[f]] } for (v in 1:num.yvar) {if(is.numeric(y.factors[v,f])) { {if(simple && (abs(y.factors[v,f]) == max(abs(y.factors[v,])) ) && (abs(y.factors[v,f]) > cut) | (!simple && (abs(y.factors[v,f]) > cut))) { if(lr) { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(y.factors[v,f])<0) +1),adj = (v %% 2))} else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],col=((sign(y.factors[v,f])<0) +1),lty=((sign(y.factors[v,f])<0) +1),adj = (v %% 2) )} } } } else {if(factors[v,f] !="0") {if(lr) {dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$left,labels =y.factors[v,f],adj = (v %% 2)) } else { dia.arrow(from=fact.rect[[f+num.xfactors]],to=var.rect[[v+num.xvar]]$bottom,labels =y.factors[v,f],adj = (v %% 2)) } } }} } if (num.yfactors ==1) { lavaan[[num.xfactors +1 ]] <- paste(fact[num.xfactors +1], "=~") for (i in 1:num.y) { sem[k,1] <- paste(fact[1+num.xfactors],"->",yvars[i],sep="") lavaan[[num.xfactors +1]] <- paste0(lavaan[[num.xfactors +1]], ' + ', yvars[i]) if(is.numeric(y.factors[i] ) ) {sem[k,2] <- paste("Fy",yvars[i],sep="")} else {sem[k,2] <- y.factors[i]} k <- k +1 } } else { #end of if num.yfactors ==1 for (f in 1:num.yfactors) { lavaan[[num.xfactors +f ]] <- paste(fact[num.xfactors +f], "=~") for (i in 1:num.y) { if( (y.factors[i,f] !="0") && (abs(y.factors[i,f]) > cut )) { lavaan[[num.xfactors +f]] <- paste0(lavaan[[num.xfactors +f]], ' + ', yvars[i]) sem[k,1] <- paste(fact[f+num.xfactors],"->",vars[i+num.xvar],sep="") if(is.numeric(y.factors[i,f])) { sem[k,2] <- paste("Fy",f,vars[i+num.xvar],sep="")} else {sem[k,2] <- y.factors[i,f]} k <- k+1 } #end of if } #end of factor } # end of variable loop } #end of else if # } if(errors) { for (i in 1:num.y) { if(lr) {dia.self(var.rect[[i+num.xvar]],side=4) } else {dia.self(var.rect[[i+num.xvar]],side=3)} sem[k,1] <- paste(vars[i+num.xvar],"<->",vars[i+num.xvar],sep="") sem[k,2] <- paste("y",i,"e",sep="") k <- k+1 }} } #end of if.null(ymodel) if(!is.null(Rx)) {#draw the correlations between the x variables for (i in 2:num.xvar) { for (j in 1:(i-1)) { if((!is.numeric(Rx[i,j] ) && ((Rx[i,j] !="0")||(Rx[j,i] !="0")))|| ((is.numeric(Rx[i,j]) && abs(Rx[i,j]) > cut ))) { if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else { dia.curve(from=var.rect[[j]]$left,to=var.rect[[i]]$left, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} } else { if(abs(i-j) < 2) { dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j]]$bottom,to=var.rect[[i]]$bottom, labels = Rx[i,j],scale=-3*(i-j)/num.xvar)} } }} } } if(!is.null(Ry)) {#draw the correlations between the y variables for (i in 2:num.yvar) { for (j in 1:(i-1)) { if((!is.numeric(Ry[i,j] ) && ((Ry[i,j] !="0")||(Ry[j,i] !="0")))|| ((is.numeric(Ry[i,j]) && abs(Ry[i,j]) > cut ))) { if (lr) {if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$right,to=var.rect[[i+num.xvar]]$right, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} } else { if(abs(i-j) < 2) { dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=(i-j)/num.xvar)} else {dia.curve(from=var.rect[[j+num.xvar]]$bottom,to=var.rect[[i+num.xvar]]$bottom, labels = Ry[i,j],scale=3*(i-j)/num.xvar)} } }} } } if(!regression) { if(!is.null(Phi)) {if (!is.matrix(Phi)) { if(!is.null(fy)) {Phi <- matrix(c(1,0,Phi,1),ncol=2)} else {Phi <- matrix(c(1,Phi,Phi,1),ncol=2)}} if(num.xfactors>1) {for (i in 2:num.xfactors) { #first do the correlations within the f set for (j in 1:(i-1)) { {if((!is.numeric(Phi[i,j] ) && ((Phi[i,j] !="0")||(Phi[j,i] !="0")))|| ((is.numeric(Phi[i,j]) && abs(Phi[i,j]) > cut ))) { if((Phi[i,j] == Phi[j,i] ) & (is.numeric(Phi[i,j]) && abs( Phi[i,j]) > 0)) { if(lr) {dia.curve(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curve(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="") lavaan[[num.xfactors +num.yfactors +1]] <- paste(fact[i], "~~", fact[j])} else {#directed arrows if(Phi[i,j] !="0") { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$right,to=fact.rect[[i]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$top,to=fact.rect[[i]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[j]," ->",fact[i],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j], "~", fact[i]) sem[k,2] <- paste("rF",j,"F",i,sep="")} else { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[i],"<-",fact[j],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[i], "~", fact[j]) sem[k,2] <- paste("rF",i,"F",j,sep="") } } } else { k <- k -1 #because we are skipping this one # lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j], "~~", fact[i]) # sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") # if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",i,"F",j,sep="")} else {sem[k,2] <- Phi[i,j] } } } k <- k + 1} } } } #end of correlations within the fx set if(!is.null(ymodel)) { for (i in 1:num.xfactors) { for (j in 1:num.yfactors) { if((!is.numeric(Phi[j+num.xfactors,i] ) && (Phi[j+num.xfactors,i] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i]) && abs(Phi[j+num.xfactors,i]) > cut ))) { #We want to draw an arrrow, but if it is numeric, we need to figure out the sign col <- 1 if((is.numeric(Phi[j+num.xfactors,i]) & (Phi[j+num.xfactors,i] < 0))) col <- 2 dia.arrow(from=fact.rect[[i]],to=fact.rect[[j+num.xfactors]],Phi[j+num.xfactors,i], col=col, lty=col) #this draws X -> Y regressions sem[k,1] <- paste(fact[i],"->",fact[j+num.xfactors],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j+num.xfactors], "~", fact[i]) } else { k <- k-1 # sem[k,1] <- paste(fact[i],"<->",fact[j+num.xfactors],sep="") # lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j+num.xfactors], "~~", fact[i])} # if (is.numeric(Phi[j+num.xfactors,i])) {sem[k,2] <- paste("rX",i,"Y",j,sep="")} else {sem[k,2] <- Phi[j+num.xfactors,i] } } k <- k + 1 } } #now look for Y -> Y regressions added 11/13/23 #does not write the lavaan code ! if(num.yfactors >1) { for(i in 1: num.yfactors){ for (j in 1:(i-1)) {#look for non zero Y - Y Phi elements if((!is.numeric(Phi[j+num.xfactors,i+num.xfactors] ) && (Phi[j+num.xfactors,i+num.xfactors] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i+num.xfactors]) && abs(Phi[i+num.xfactors,j+num.xfactors]) > cut ))) { dia.arrow(from=fact.rect[[j+num.xfactors]],to=fact.rect[[i+num.xfactors]],Phi[i+num.xfactors,j+num.xfactors], col=col, lty=col) } } } } #end of y <- Y regressions } #end of null y.model } #end of Phi loop } #end of regression # } if(num.factors > 0 ) { for(f in 1:num.factors) { sem[k,1] <- paste(fact[f],"<->",fact[f],sep="") sem[k,3] <- "1" k <- k+1 } model=sem[1:(k-1),] } class(model) <- "mod" #suggested by John Fox to make the output cleaner lavaan <- unlist(lavaan) lavaan <- noquote(lavaan) result <- list(sem=model,lavaan=lavaan) return(invisible(result)) } (i-j)/num.xvar)} } }} } } if(!regression) { if(!is.null(Phi)) {if (!is.matrix(Phi)) { if(!is.null(fy)) {Phi <- matrix(c(1,0,Phi,1),ncol=2)} else {Phi <- matrix(c(1,Phi,Phi,1),ncol=2)}} if(num.xfactors>1) {for (i in 2:num.xfactors) { #first do the correlations within the f set for (j in 1:(i-1)) { {if((!is.numeric(Phi[i,j] ) && ((Phi[i,j] !="0")||(Phi[j,i] !="0")))|| ((is.numeric(Phi[i,j]) && abs(Phi[i,j]) > cut ))) { if((Phi[i,j] == Phi[j,i] ) & (is.numeric(Phi[i,j]) && abs( Phi[i,j]) > 0)) { if(lr) {dia.curve(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curve(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") sem[k,2] <- paste("rF",i,"F",j,sep="") lavaan[[num.xfactors +num.yfactors +1]] <- paste(fact[i], "~~", fact[j])} else {#directed arrows if(Phi[i,j] !="0") { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$right,to=fact.rect[[i]]$right, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[j]],to=fact.rect[[i]], labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[j]]$top,to=fact.rect[[i]]$top, labels = Phi[i,j],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[j]," ->",fact[i],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j], "~", fact[i]) sem[k,2] <- paste("rF",j,"F",i,sep="")} else { if(lr) { if(abs(i-j) < 2) {dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$right,to=fact.rect[[j]]$right, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } else { if(abs(i-j) < 2) { dia.arrow(from=fact.rect[[i]],to=fact.rect[[j]], labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} else { dia.curved.arrow(from=fact.rect[[i]]$top,to=fact.rect[[j]]$top, labels = Phi[j,i],scale=2*(i-j)/num.xfactors)} } sem[k,1] <- paste(fact[i],"<-",fact[j],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[i], "~", fact[j]) sem[k,2] <- paste("rF",i,"F",j,sep="") } } } else { k <- k -1 #because we are skipping this one # lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j], "~~", fact[i]) # sem[k,1] <- paste(fact[i],"<->",fact[j],sep="") # if (is.numeric(Phi[i,j])) {sem[k,2] <- paste("rF",i,"F",j,sep="")} else {sem[k,2] <- Phi[i,j] } } } k <- k + 1} } } } #end of correlations within the fx set if(!is.null(ymodel)) { for (i in 1:num.xfactors) { for (j in 1:num.yfactors) { if((!is.numeric(Phi[j+num.xfactors,i] ) && (Phi[j+num.xfactors,i] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i]) && abs(Phi[j+num.xfactors,i]) > cut ))) { #We want to draw an arrrow, but if it is numeric, we need to figure out the sign col <- 1 if((is.numeric(Phi[j+num.xfactors,i]) & (Phi[j+num.xfactors,i] < 0))) col <- 2 dia.arrow(from=fact.rect[[i]],to=fact.rect[[j+num.xfactors]],Phi[j+num.xfactors,i], col=col, lty=col) #this draws X -> Y regressions sem[k,1] <- paste(fact[i],"->",fact[j+num.xfactors],sep="") lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j+num.xfactors], "~", fact[i]) } else { k <- k-1 # sem[k,1] <- paste(fact[i],"<->",fact[j+num.xfactors],sep="") # lavaan[[num.xfactors +num.yfactors +k]] <- paste(fact[j+num.xfactors], "~~", fact[i])} # if (is.numeric(Phi[j+num.xfactors,i])) {sem[k,2] <- paste("rX",i,"Y",j,sep="")} else {sem[k,2] <- Phi[j+num.xfactors,i] } } k <- k + 1 } } #now look for Y -> Y regressions added 11/13/23 #does not write the lavaan code ! if(num.yfactors >1) { for(i in 1: num.yfactors){ for (j in 1:(i-1)) {#look for non zero Y - Y Phi elements if((!is.numeric(Phi[j+num.xfactors,i+num.xfactors] ) && (Phi[j+num.xfactors,i+num.xfactors] !="0"))|| ((is.numeric(Phi[j+num.xfactors,i+num.xfactors]) && abs(Phi[i+num.xfactors,j+num.xfactors]) > cut ))) { dia.arrow(from=fact.rect[[j+num.xfactors]],to=fact.rect[[i+num.xfactors]],Phi[i+num.xfactors,j+num.xfactors], col=col, lty=col) } } } } #end of y <- Y regressions } #end of null y.model } #end of Phi loop } #end of regression # } if(num.factors > 0 ) { for(f in 1:num.factors) { sem[k,1] <- paste(fact[f],"<->",fact[f],sep="") sem[k,3] <- "1" k <- k+1 } model=sem[1:(k-1),] } class(model) <- "mod" #suggested by John Fox to make the output cleaner lavaan <- unlist(lavaan) lavaan <- noquote(lavaan) result <- list(sem=model,lavaan=lavaan) return(invisible(result)) } psych/R/reliability.R0000644000176200001440000001070714527502214014244 0ustar liggesusers #Developed 6/15/21 #Fixed 6/20/21 to avoid the problem of scales of one item or too many to find splits #Modified 7/10/21 to allow it to function with just a set of items, no keys specified #Modified 7/11/21 to include the unidimensional estimates from unidim and to use the R object for speed #Further modified 9/30/23 to force 1 factor solution length(keys < 4) #added the covar option 10/23/23 reliability <- function(keys=NULL,items,nfactors=2,split=TRUE,raw=TRUE,plot=FALSE,hist=FALSE, n.sample=10000,brute=FALSE,check.keys=TRUE,covar=FALSE) { cl <- match.call() result <- list() splits <- list() best <- worst <- list() res.name <- list() if(hist) raw <-TRUE if(raw) split <- TRUE NFACTORS <- nfactors #check to see if the first parameter is a list of keys, if not, then we want to do reliability on just one scale if(!is.null(keys)){ if(NCOL(keys)==1) { n.scales <- length(keys) } else { items <- keys #this is the case where the user did not specify keys but just the data message("keys not specified, all items will be scored") n.scales <- 1 keys <- list() keys[["All_items"]] <- colnames(items) }} else {n.scales <- 1 keys[["All_items"]] <- colnames(items) } if(isCorrelation(items)) {cors<- TRUE} else {cors<- FALSE} for (scales in 1:n.scales) { scale.key <- keys[[scales]] select <- selectFromKeys(scale.key) if(length(select)>1 ) { if (length(select)< 4) {nfactors <- 1} else {nfactors <- NFACTORS} #omegah with 2 factors is not defined for nvar<=5 if(cors) {om <- omegah(items[select,select], nfactors=nfactors,plot=plot,two.ok=TRUE) } else { om <- omegah(items[,select], nfactors=nfactors,plot=plot,two.ok=TRUE,covar=covar)} uni <- unidim(om$R) #use the R object rather than redoing the factoring if(split){temp.keys <- colnames(om$R) <- gsub("-","",colnames(om$R)) sign.key <- rep("",length(select)) sign.key[which(colnames(om$R)!= rownames(om$R))] <- "-" temp.keys <- paste0(sign.key,temp.keys) # split.half <- suppressWarnings(splitHalf(om$R,raw=raw,brute=FALSE,n.sample=n.sample, key=temp.keys)) split.half <- suppressWarnings(splitHalf(om$R,raw=raw,brute=brute,n.sample=n.sample,check.keys=check.keys,covar=covar)) #don't use temp.keys best[[scales]] <- list(max=split.half$maxAB) worst[[scales]] <- list(min=split.half$minAB) result[[scales]] <- list(omega_h = om$omega_h, alpha = split.half$alpha, omega.tot = om$omega.tot, u=uni$u[1],tau=uni$u[2],cong=uni$u[3],maxrb=split.half$maxrb,minrb=split.half$minrb, mean.r=split.half$av.r, med.r <- split.half$med.r, n.items=length(select) ) if(raw) splits[[scales]] <- split.half$raw} else { result[[scales]] <- list(omega_h = om$omega_h, alpha = om$alpha, omega.tot = om$omega.tot,u=uni$u[1],tau=uni$u[2],cong=uni$u[3],n.items=length(select)) } res.name[scales] <- names(keys)[scales] } } best <- unlist(unlist(best,recursive=FALSE),recursive=FALSE) #Creates a list that keeps the names worst <- unlist(unlist(worst,recursive=FALSE),recursive=FALSE) # names(result) <- res.name if(split) {ncol <- 11} else {ncol <- 7} result.df <- matrix(unlist(result[!is.null(result)]), ncol=ncol,byrow=TRUE) if(split) { colnames(result.df) <- c("omega_h", "alpha", "omega.tot", "Uni","tau","cong","max.split","min.split","mean.r", "med.r", "n.items") } else { colnames(result.df) <- c("omega.h", "alpha", "omega.tot","Uni","tau","cong","n.items")} rownames(result.df) <- unlist(res.name) if(raw) { lx <- unlist(lapply(splits,length)) splits <- splits[lx>0] names(splits) <- rownames(result.df) # splits.mat <- matrix(unlist(splits),ncol=length(keys)) # colnames(splits.mat) <- names(keys) class(result.df) <- c("psych","reliability", "matrix") names(best)<- paste(rep(rownames(result.df),each=2),names(best)) names(worst) <- paste(rep(rownames(result.df),each=2),names(worst)) result <- list(result.df = result.df,splits= splits,max=best,min=worst, Call = cl) if(hist) {multi.hist(splits)} class(result) <- c("psych","reliability") return(result) } else { class(result.df) <- c("psych","reliability", "matrix") return(result.df) } } #Created June 11-17, 2021 #fixed 6/20/21 to avoid the problem of null cases #fixed 1/3/23 to avoid problem with single items psych/R/circ.sim.R0000644000176200001440000000255410626070133013437 0ustar liggesusers"circ.sim" <- function (nvar = 72 ,nsub = 500, circum = TRUE, xloading =.6, yloading = .6, gloading=0, xbias=0, ybias = 0,categorical=FALSE, low=-3,high=3,truncate=FALSE,cutpoint=0) { avloading <- (xloading+yloading)/2 errorweight <- sqrt(1-(avloading^2 + gloading^2)) #squared errors and true score weights add to 1 g <- rnorm(nsub) truex <- rnorm(nsub)* xloading +xbias #generate normal true scores for x + xbias truey <- rnorm(nsub) * yloading + ybias #generate normal true scores for y + ybias if (circum) #make a vector of radians (the whole way around the circle) if circumplex {radia <- seq(0,2*pi,len=nvar+1) rad <- radia[which(radia<2*pi)] #get rid of the last one } else rad <- c(rep(0,nvar/4),rep(pi/2,nvar/4),rep(pi,nvar/4),rep(3*pi/2,nvar/4)) #simple structure #simple structure error<- matrix(rnorm(nsub*(nvar)),nsub) #create normal error scores #true score matrix for each item reflects structure in radians trueitem <- outer(truex, cos(rad)) + outer(truey,sin(rad)) item<- gloading * g + trueitem + errorweight*error #observed item = true score + error score if (categorical) { item = round(item) #round all items to nearest integer value item[(item<= low)] <- low item[(item>high) ] <- high } if (truncate) {item[item < cutpoint] <- 0 } return (item) } psych/R/headtail.R0000644000176200001440000000656614426061504013516 0ustar liggesusers"headtail" <- function(x, hlength=4,tlength=4,digits=2,ellipsis=TRUE,from=1,to=NULL) { .Deprecated("headTail", msg = "headtail is deprecated. Please use the headTail function") if(is.data.frame(x) | is.matrix(x) ) { if (is.matrix(x)) x <- data.frame(unclass(x)) nvar <- dim(x)[2] if(is.null(to)) to <- nvar dots <- rep("...",nvar) h <- data.frame(head(x[from:to],hlength)) t <- data.frame(tail(x[from:to],tlength)) for (i in 1:nvar) { if(is.numeric(h[1,i])) {h[i] <- round(h[i],digits) t[i] <- round(t[i],digits) } else {dots[i] <- NA} } if(ellipsis) { head.tail <- rbind(h,... = dots,t)} else {head.tail <- rbind(h,t) } } else {h <- head(x,hlength) t <- tail(x,tlength) if(ellipsis) { head.tail <- rbind(h,"... ...",t) } else { head.tail <- rbind(h,t) head.tail <- as.matrix(head.tail)}} return(head.tail)} #revised Feb 1, 2010 #revised August 10, 2011 to work with mixed numeric and non-numeric data #changing the name of headtail to be camelcase. "headTail" <- function(x, top=4,bottom=4,from=1,to=NULL,digits=2, hlength=4,tlength=4,ellipsis=TRUE) { if(is.data.frame(x) | is.matrix(x) ) { if (is.matrix(x)) x <- data.frame(unclass(x)) nvar <- dim(x)[2] hlength <- top tlength <- bottom if(is.null(to)) to <- nvar dots <- rep("...",nvar) h <- data.frame(head(x[from:to],hlength)) t <- data.frame(tail(x[from:to],tlength)) nvar <- NCOL(h) dots <- rep("...",nvar) for (i in 1:nvar) { if(is.numeric(h[1,i])) {h[i] <- round(h[i],digits) t[i] <- round(t[i],digits) } else {dots[i] <- NA} } if(ellipsis) { head.tail <- rbind(h,... = dots,t)} else {head.tail <- rbind(h,t) } } else {h <- head(x,hlength) t <- tail(x,tlength) if(ellipsis) { head.tail <- rbind(h,"... ...",t) } else { head.tail <- rbind(h,t) head.tail <- as.matrix(head.tail)}} return(head.tail)} #revised Feb 1, 2010 #revised August 10, 2011 to work with mixed numeric and non-numeric data topBottom <- function (x, top=4,bottom=4,from=1,to=NULL, digits=2, hlength = 4, tlength = 4) { if (is.data.frame(x) | is.matrix(x)) { if (is.matrix(x)) x <- data.frame(unclass(x)) nvar <- dim(x)[2] hlength <- top tlength <- bottom if(is.null(to)) to <- nvar ellipsis <- rep("...", nvar) h <- data.frame(head(x[from:to], hlength)) t <- data.frame(tail(x[from:to], tlength)) for (i in 1:nvar) { if (is.numeric(h[1, i])) { h[i] <- round(h[i], digits) t[i] <- round(t[i], digits) } else { ellipsis[i] <- NA } } head.tail <- rbind(h, t) head.tail <- as.matrix(head.tail) } else { h <- head(x, hlength) t <- tail(x, tlength) head.tail <-as.matrix( rbind(h, t)) } return(head.tail) } #added June, 2012 #added April 20, 2017 "quickView" <- function(x,top=8,bottom=8,from=1,to=NULL) { if(is.null(to)) to <- NCOL(x) if(NROW(x) < (top + bottom)) {bottom <- NROW(x) - top} if(NCOL(x) > 1) { View(x[c(1:top,(NROW(x)+1 - bottom):NROW(x)),from:to])} else { View(x[c(1:top,(NROW(x)+1 - bottom):NROW(x))]) } #the case of a vector }psych/R/matReg.r0000644000176200001440000001316214017211232013177 0ustar liggesusers #a helper function to find regressions from covariances #May 6, 2016 #Fixed November 29, 2018 to handle se of partialed variables correctly #modified September 25, 2019 to find intercepts and standard errors using momements #modified even more November 25, 2019 to return the intercepts and R2 matReg <- function(x,y=NULL,C=NULL,m=NULL,z=NULL,n.obs=0,means=NULL,std=FALSE,raw=TRUE,part=FALSE) { #first, allow for formula based calls cl <- match.call() #convert names to locations if given formula input prod <- ex <- NULL #in case we do not have formula input #first, see if they are in formula mode if(inherits(x,"formula")) { ps <- fparse(x) y <- ps$y #[c(y, x, z, ex)] x <- ps$x data <- C #to make the following easier to adapt from setCor med <- ps$m #but, mediation is not done here, so we just add this to x # if(!is.null(med)) x <- c(x,med) #not necessary, because we automatically put this in prod <- ps$prod z <- ps$z #do we have any variable to partial out ex <- ps$ex form <- TRUE } else {form <- FALSE} # data <- char2numeric(data) #move to later (01/05/19) if(is.numeric(y )) y <- colnames(data)[y] if(is.numeric(x )) x <- colnames(data)[x] if(is.numeric(z )) z <- colnames(data)[z] #check for bad input if(any( !(c(y,x,z,ex) %in% colnames(data)) )) { cat("\nOops! Variable names are incorrect. Offending items are ", c(y, x, z, ex)[which(!(c(y, x, z, ex) %in% colnames(data)))],"\n") stop("I am stopping because the variable names are incorrect. See above.")} #we might have had formula input thus, make C the covariance matrix # if(form) C <- cov(data[c(y,x,z,ex)],use="pairwise") if(is.null(n.obs)) n.obs <- 0 numx <- length(x) #this is the number of predictors (but we should adjust by the number of covariates) numz <- length(z) numy <- length(y) #df <- n.obs -1 - numx - length(z) - length(m) #but this does not take into account the mediating variables #note that the x variable includes the intercept and thus uses up one extra df df <- n.obs - numx -numz #We have partialed out z, should we use the df from it? This is changed 11/26/19 to reduce df for z Cr <- cov2cor(C) if(!is.null(z)){numz <- length(z) #partial out the z variables zm <- C[z,z,drop=FALSE] za <- C[x,z,drop=FALSE] zb <- C[y,z,drop=FALSE] zmi <- solve(zm) x.matrix <- C[x,x,drop=FALSE] - za %*% zmi %*% t(za) if(!part) y.matrix <- C[y,y,drop=FALSE] - zb %*% zmi %*% t(zb) #part versus partial (default) xy.matrix <- C[x,y,drop=FALSE] - za %*% zmi %*% t(zb) C <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix)) } if(numx==1) { beta <- solve(C[x,x,drop=FALSE],(C[x,y,drop=FALSE])) colnames(beta) <- y } else { beta <- solve(C[x,x],(C[x,y])) } #this is the same as setCor and is a x * x matrix if(!is.matrix(beta)) {beta <- matrix(beta,nrow=length(beta))} #beta is a matrix of beta weights if(is.character(x)) {rownames(beta) <- x} else {rownames(beta) <- colnames(C)[x]} if(is.character(y)) { colnames(beta) <- y} else { colnames(beta) <- colnames(C)[y]} x.inv <- solve(C[x,x]) #solve x.matrix #taken from setCor yhat <- t(C[x,y,drop=FALSE]) %*% x.inv %*% C[x,y,drop=FALSE] resid <- C[y,y]- yhat if(!std ) { df <- n.obs - numx - numz Residual.se <- sqrt(diag(resid /df)) #this is the df n.obs - length(x)) se <- MSE <- diag(resid )/(df) if(length(y) > 1) {SST <- diag(C[y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)} R2 <- (SST - diag(resid)) /SST se.beta <- list() for (i in 1:length(y)) { se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv)) } se <- matrix(unlist(se.beta),ncol=numy) if(length(y) > 1) {SST <- diag(C [y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)} R2 <- (SST - diag(resid)) /SST } else { R2 <- colSums(beta * C[x,y])/diag(C[y,y,drop=FALSE]) #the standardized case uniq <- 1-(1-1/diag(solve(Cr[x,x,drop=FALSE]))) #1- smc if(n.obs > 2) { # se <- (sqrt((1-R2)/(n.obs-1 - numx-numz)) %*% t(sqrt(1/uniq))) #these are the standardized se se <- (sqrt((1-R2)/(df)) %*% t(sqrt(1/uniq))) #setCor uses df = n.obs - numx - 1 se <- t( se * sqrt(diag(C[y,y,drop=FALSE])) %*% t(sqrt(1/diag(C[x,x,drop=FALSE]))) ) #But does this work in the general case? colnames(se) <- colnames(beta) } else {se <- NA} if(raw) { #used to compare models -- we need to adjust this for dfs Residual.se <- sqrt((1-R2)* df/(df-1)) } else { #this is a kludge and is necessary to treat the SSR correctly Residual.se <- sqrt((1-R2)/df * (n.obs-1))} } if(!any(is.na(se))) { tvalue <- beta/se # prob <- 2*(1- pt(abs(tvalue),df)) prob <- -2 * expm1(pt(abs(tvalue),df,log.p=TRUE)) } else {tvalue <- prob <- df <- NA} result <- list(beta=beta,se=se, t=tvalue,df=df,prob=prob,R2=R2,SE.resid=Residual.se) return(result) } ####### psych/R/print.psych.R0000644000176200001440000013124714526763642014234 0ustar liggesusers#reorganized May 25, 2009 to call several print functions (psych.print.x where x = {fa, omega, stats, vss} #reorganized, January 18, 2009 to make some what clearer #added the switch capability, August 25, 2011 following suggestions by Joshua Wiley #changed from print.psych.x to print_psych.x because of change to R development not liking print.x.y format "print.psych" <- function(x,digits=2,all=FALSE,cut=NULL,sort=FALSE,short=TRUE,lower=TRUE,signif=NULL,...) { #probably need to fix this with inherits but trying to avoid doing that now. if(length(class(x)) > 1) { value <- class(x)[2] } else { #these next test for non-psych functions that may be printed using print.psych.fa if((!is.null(x$communality.iterations)) | (!is.null(x$uniquenesses)) | (!is.null(x$rotmat)) | (!is.null(x$Th)) ) {value <- fa } } if(all) value <- "all" if(value == "score.items") value <- "scores" if(value =="set.cor") value <- "setCor" switch(value, ## the following functions have their own print function esem = {print_psych.esem(x,digits=digits,short=short,cut=cut,...)}, extension = { print_psych.fa(x,digits=digits,all=all,cut=cut,sort=sort,...)}, extend = {print_psych.fa(x,digits=digits,all=all,cut=cut,sort=sort,...)}, fa = {print_psych.fa(x,digits=digits,all=all,cut=cut,sort=sort,...)}, fa.ci = { print_psych.fa.ci(x,digits=digits,all=all,... )}, iclust= { print_psych.iclust(x,digits=digits,all=all,cut=cut,sort=sort,...)}, omega = { print_psych.omega(x,digits=digits,all=all,cut=cut,sort=sort,...)}, omegaSem= {print_psych.omegaSem(x,digits=digits,all=all,cut=cut,sort=sort,...)}, principal ={print_psych.fa(x,digits=digits,all=all,cut=cut,sort=sort,...)}, schmid = { print_psych.schmid(x,digits=digits,all=all,cut=cut,sort=sort,...)}, stats = { print_psych.stats(x,digits=digits,all=all,cut=cut,sort=sort,...)}, vss= { print_psych.vss(x,digits=digits,all=all,cut=cut,sort=sort,...)}, cta = {print_psych.cta(x,digits=digits,all=all,...)}, mediate = {print_psych.mediate(x,digits=digits,short=short,...)}, multilevel = {print_psych.multilevel(x,digits=digits,short=short,...)}, testRetest = {print_psych.testRetest(x,digits=digits,short=short,...)}, bestScales = {print_psych.bestScales(x,digits=digits,short=short,...)}, ##Now, for the smaller print jobs, just do it here. all= {class(x) <- "list" print(x,digits=digits) }, #find out which function created the data and then print accordingly alpha = { cat("\nReliability analysis ",x$title," \n") cat("Call: ") print(x$call) cat("\n ") print(x$total,digits=digits) cat("\n 95% confidence boundaries \n") temp.df <- data.frame(lower=x$feldt$lower.ci, alpha= x$feldt$alpha,upper.ci =x$feldt$upper.ci) colnames(temp.df ) <- c("lower","alpha","upper") rownames(temp.df) <- "Feldt" if(!is.null(x$total$ase)){ temp.df[2,] <- c(x$total$raw_alpha - 1.96* x$total$ase, x$total$raw_alpha,x$total$raw_alpha +1.96* x$total$ase) #cat(round(c(x$total$raw_alpha - 1.96* x$total$ase, x$total$raw_alpha,x$total$raw_alpha +1.96* x$total$ase),digits=digits) ,"\n")} #if(!is.null(x$feldt)) {cat("\n 95% confidence boundaries (Feldt) \n") #temp.df <- data.frame(lower=x$feldt$lower.ci, alpha= x$feldt$alpha,upper.ci =x$feldt$upper.ci) rownames(temp.df)[2]<- "Duhachek" } if(!is.null(x$boot.ci)) { #{cat("\n lower median upper bootstrapped confidence intervals\n",round(x$boot.ci,digits=digits))} temp.df[3,] <- x$boot.ci rownames(temp.df)[3] <- "bootstrapped"} print(round(temp.df,digits)) cat("\n Reliability if an item is dropped:\n") print(x$alpha.drop,digits=digits) cat("\n Item statistics \n") print(x$item.stats,digits=digits) if(!is.null(x$response.freq)) { cat("\nNon missing response frequency for each item\n") print(round(x$response.freq,digits=digits))} }, alpha.ci = { cat("\n 95% confidence boundaries (Feldt)\n") if(is.na(x$r.bar)) {print("I am sorry, you need to specify the n.var to get confidence interals")} else { temp.df <- data.frame(lower=x$lower.ci, alpha= x$alpha,upper.ci =x$upper.ci) colnames(temp.df ) <- c("lower","alpha","upper") rownames(temp.df)<-"" print(round (temp.df,digits))} }, autoR = {cat("\nAutocorrelations \n") if(!is.null(x$Call)) {cat("Call: ") print(x$Call)} print(round(x$autoR,digits=digits)) }, bassAck = { cat("\nCall: ") print(x$Call) nf <- length(x$bass.ack)-1 for (f in 1:nf) { cat("\n",f, x$sumnames[[f]])} if(!short) { for (f in 1:nf) { cat("\nFactor correlations\n ") print(round(x$bass.ack[[f]],digits=digits))} } else {cat("\nUse print with the short = FALSE option to see the correlations, or use the summary command.")} }, auc = {cat('Decision Theory and Area under the Curve\n') cat('\nThe original data implied the following 2 x 2 table\n') print(x$probabilities,digits=digits) cat('\nConditional probabilities of \n') print(x$conditional,digits=digits) cat('\nAccuracy = ',round(x$Accuracy,digits=digits),' Sensitivity = ',round(x$Sensitivity,digits=digits), ' Specificity = ',round(x$Specificity,digits=digits), '\nwith Area Under the Curve = ', round(x$AUC,digits=digits) ) cat('\nd.prime = ',round(x$d.prime,digits=digits), ' Criterion = ',round(x$criterion,digits=digits), ' Beta = ', round(x$beta,digits=digits)) cat('\nObserved Phi correlation = ',round(x$phi,digits=digits), '\nInferred latent (tetrachoric) correlation = ',round(x$tetrachoric,digits=digits)) }, bestScales = {if(!is.null(x$first.result)) { cat("\nCall = ") print(x$Call) # print(x$first.result) # print(round(x$means,2)) print(x$summary,digits=digits) # x$replicated.items items <- x$items size <- NCOL(items[[1]]) nvar <- length(items) for(i in 1:nvar) { if(NCOL(items[[i]]) > 3) {items[[i]] <- items[[i]][,-1]} # items[[i]][,2:3] <- round(items[[i]][,2:3],digits) if(length( items[[i]][1]) > 0 ) { items[[i]][,c("mean.r","sd.r")] <- round(items[[i]][,c("mean.r","sd.r")],digits) }} cat("\n Best items on each scale with counts of replications\n") print(items)} else { df <- data.frame(correlation=x$r,n.items = x$n.items) cat("The items most correlated with the criteria yield r's of \n") print(round(df,digits=digits)) if(length(x$value) > 0) {cat("\nThe best items, their correlations and content are \n") print(x$value) } else {cat("\nThe best items and their correlations are \n") for(i in 1:length(x$short.key)) {print(round(x$short.key[[i]],digits=digits))} } } }, bifactor = { cat("Call: ") print(x$Call) cat("Alpha: ",round(x$alpha,digits),"\n") cat("G.6: ",round(x$G6,digits),"\n") cat("Omega Hierarchical: " ,round(x$omega_h,digits),"\n") # cat("Omega H asymptotic: " ,round(x$omega.lim,digits),"\n") cat("Omega Total " ,round(x$omega.tot,digits),"\n") print(x$f,digits=digits,sort=sort) }, circ = {cat("Tests of circumplex structure \n") cat("Call:") print(x$Call) res <- data.frame(x[1:4]) print(res,digits=2) }, circadian = {if(!is.null(x$Call)) {cat("Call: ") print(x$Call)} cat("\nCircadian Statistics :\n") if(!is.null(x$F)) { cat("\nCircadian F test comparing groups :\n") print(round(x$F,digits)) if(short) cat("\n To see the pooled and group statistics, print with the short=FALSE option") } if(!is.null(x$pooled) && !short) { cat("\nThe pooled circadian statistics :\n") print( x$pooled)} if(!is.null(x$bygroup) && !short) {cat("\nThe circadian statistics by group:\n") print(x$bygroup)} #if(!is.null(x$result)) print(round(x$result,digits)) if(!is.null(x$phase.rel)) { cat("\nSplit half reliabilities are split half correlations adjusted for test length\n") x.df <- data.frame(phase=x$phase.rel,fits=x$fit.rel) print(round(x.df,digits)) } if(is.data.frame(x)) {class(x) <- "data.frame" print(round(x,digits=digits)) } }, cluster.cor = { cat("Call: ") print(x$Call) cat("\n(Standardized) Alpha:\n") print(x$alpha,digits) cat("\n(Standardized) G6*:\n") print(x$G6,digits) cat("\nAverage item correlation:\n") print(x$av.r,digits) cat("\nNumber of items:\n") print(x$size) cat("\nSignal to Noise ratio based upon average r and n \n") print(x$sn,digits=digits) # cat("\nScale intercorrelations:\n") # print(x$cor,digits=digits) cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, alpha on the diagonal \n corrected correlations above the diagonal:\n") print(x$corrected,digits) }, cluster.loadings = { cat("Call: ") print(x$Call) cat("\n(Standardized) Alpha:\n") print(x$alpha,digits) cat("\n(Standardized) G6*:\n") print(x$G6,digits) cat("\nAverage item correlation:\n") print(x$av.r,digits) cat("\nNumber of items:\n") print(x$size) cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, alpha on the diagonal \n corrected correlations above the diagonal:\n") print(x$corrected,digits) cat("\nItem by scale intercorrelations\n corrected for item overlap and scale reliability\n") print(x$loadings,digits) #cat("\nItem by scale Pattern matrix\n") # print(x$pattern,digits) }, cohen.d = {cat("Call: ") print(x$Call) cat("Cohen d statistic of difference between two means\n") if(NCOL(x$cohen.d) == 3) {print(round(x$cohen.d,digits=digits))} else {print( data.frame(round(x$cohen.d[1:3],digits=digits),x$cohen.d[4:NCOL(x$cohen.d)]))} cat("\nMultivariate (Mahalanobis) distance between groups\n") print(x$M.dist,digits=digits) cat("r equivalent of difference between two means\n") print(round(x$r,digits=digits)) }, cohen.d.by = {cat("Call: ") print(x$Call) ncases <- length(x) for (i in (1:ncases)) {cat("\n Group levels = ",names(x[i]),"\n") cat("Cohen d statistic of difference between two means\n") print(x[[i]]$cohen.d,digits=digits) cat("\nMultivariate (Mahalanobis) distance between groups\n") print(x[[i]]$M.dist,digits=digits) cat("r equivalent of difference between two means\n") print(x[[i]]$r,digits=digits) } cat("\nUse summary for more compact output") }, cohen.profile = {cat("Cohen Profile coefficients \n") print(round(unclass(x),digits=digits))}, congruence = {cat("Congruence coefficients \n") print(round(unclass(x),digits=digits))}, comorbid = {cat("Call: ") print(x$Call) cat("Comorbidity table \n") print(x$twobytwo,digits=digits) cat("\nimplies phi = ",round(x$phi,digits), " with Yule = ", round(x$Yule,digits), " and tetrachoric correlation of ", round(x$tetra$rho,digits)) cat("\nand normal thresholds of ",round(-x$tetra$tau,digits)) }, corCi = {#cat("Call:") # print(x$Call) cat("\n Correlations and normal theory confidence intervals \n") print(round(x$r.ci,digits=digits)) }, cor.ci = {cat("Call:") print(x$Call) cat("\n Coefficients and bootstrapped confidence intervals \n") lowerMat(x$rho) phis <- x$rho[lower.tri(x$rho)] cci <- data.frame(lower.emp =x$ci$low.e, lower.norm=x$ci$lower,estimate =phis ,upper.norm= x$ci$upper, upper.emp=x$ci$up.e,p = x$ci$p) rownames(cci) <- rownames(x$ci) cat("\n scale correlations and bootstrapped confidence intervals \n") print(round(cci,digits=digits)) }, cor.cip = {class(x) <- NULL cat("\n High and low confidence intervals \n") print(round(x,digits=digits)) }, corr.test = {cat("Call:") print(x$Call) cat("Correlation matrix \n") print(round(x$r,digits)) cat("Sample Size \n") print(x$n) if(x$sym) {cat("Probability values (Entries above the diagonal are adjusted for multiple tests.) \n")} else { if (x$adjust != "none" ) {cat("These are the unadjusted probability values.\n The probability values adjusted for multiple tests are in the p.adj object. \n")}} print(round(x$p,digits)) if(short) cat("\n To see confidence intervals of the correlations, print with the short=FALSE option\n") if(!short) {cat("\n Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci\n") if(is.null(x$ci.adj)) { ci.df <- data.frame(raw=x$ci) } else { ci.df <- data.frame(raw=x$ci,lower.adj = x$ci.adj$lower.adj,upper.adj=x$ci.adj$upper.adj)} print(round(ci.df,digits)) } }, corr.p = {cat("Call:") print(x$Call) cat("Correlation matrix \n") print(round(x$r,digits)) cat("Sample Size \n") print(x$n) if(x$sym) {cat("Probability values (Entries above the diagonal are adjusted for multiple tests.) \n")} else { if (x$adjust != "none" ) {cat("These are the unadjusted probability values. \n To see the values adjusted for multiple tests see the p.adj object. \n")}} print(round(x$p,digits)) if(short) cat("\n To see confidence intervals of the correlations, print with the short=FALSE option\n") if(!short) {cat("\n Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci\n") print(round(x$ci,digits)) } }, cortest= {cat("Tests of correlation matrices \n") cat("Call:") print(x$Call) cat(" Chi Square value" ,round(x$chi,digits)," with df = ",x$df, " with probability <", signif(x$p,digits),"\n" ) if(!is.null(x$z)) cat("z of differences = ",round(x$z,digits),"\n") }, cor.wt = {cat("Weighted Correlations \n") cat("Call:") print(x$Call) lowerMat(x$r,digits=digits) }, crossV = {if(is.null(x$mean.fit)) { cat("Cross Validation\n") cat("Call:") print(x$Call) cat("\nValidities from raw items or from the correlation matrix\n") cat("Number of unique predictors used = ",x$nvars,"\n") print(x$crossV,digits=digits) if(!is.null(x$item.R)) {cat("\nCorrelations based upon item based regressions \n") lowerMat(x$item.R)} cat("\nCorrelations based upon correlation matrix based regressions\n") lowerMat(x$mat.R) } else {cat("Bootstrapped Cross Validation\n") cat("Call:") print(x$Call) print(round(x$mean.fit,digits=digits)) cat("\n With average coefficents of \n") print(round(x$mean.coeff,digits=digits)) } }, describe= {if(!is.null(x$signif)) { if( missing(signif) ) signif <-x$signif x$signif <- NULL } if (length(dim(x))==1) {class(x) <- "list" attr(x,"call") <- NULL if(!missing(signif)) x <- signifNum(x,digits=signif) print(round(x,digits=digits)) } else {class(x) <- "data.frame" if(!missing(signif)) x <- signifNum(x,digits=signif) print(round(x,digits=digits)) } }, describeBy = {cat("\n Descriptive statistics by group \n") if(!is.null(x$Call)){ cat("Call: " ) print(x$Call) } class(x) <- "by" print(x,digits=digits) }, describeData = {if (length(dim(x))==1) {class(x) <- "list" attr(x,"call") <- NULL print(round(x,digits=digits)) } else { cat('n.obs = ', x$n.obs, "of which ", x$complete.cases," are complete cases. Number of variables = ",x$nvar," of which all are numeric ",x$all.numeric," \n") print(x$variables) } }, describeFast = { cat("\n Number of observations = " , x$n.obs, "of which ", x$complete.cases," are complete cases. Number of variables = ",x$nvar," of which ",x$numeric," are numeric and ",x$factors," are factors \n") if(!short) {print(x$result.df) } else {cat("\n To list the items and their counts, print with short = FALSE") } }, direct = { cat("Call: ") print(x$Call) cat("\nDirect Schmid Leiman = \n") print(x$direct,cut=cut) } , faBy = { cat("Call: ") print(x$Call) cat("\n Factor analysis by Groups\n") cat("\nAverage standardized loadings (pattern matrix) based upon correlation matrix for all cases as well as each group\n") cat("\nlow and high ", x$quant,"% quantiles\n") print(x$faby.sum,digits) if(!short) { cat("\n Pooled loadings across groups \n") print(x$mean.loading,digits=digits) cat("\n Average factor intercorrelations for all cases and each group\n") print(x$mean.Phi,digits=2) cat("\nStandardized loadings (pattern matrix) based upon correlation matrix for all cases as well as each group\n") print(x$loadings,digits=digits) cat("\n With factor intercorrelations for all cases and for each group\n") print(x$Phi,digits=2) if(!is.null(x$fa)) { cat("\nFactor analysis results for each group\n") print(x$fa,digits) } else {print("For a more informative output, print with short=FALSE")}} }, faCor = { cat("Call: ") print(x$Call) if(!short) { cat("\n Factor Summary for first solution\n") summary(x$f1) cat("\n Factor Summary for second solution\n") summary(x$f2) } cat("\n Factor correlations between the two solutions\n") print(x$r,digits=digits) cat("\n Factor congruence between the two solutions\n") print(x$congruence,digits=digits) }, fa.reg ={cat("Call: ") print(x$Call) if(!short) { cat("\n Factor analysis based regression \n") print (x$regression)} else { cat("\n Factor analysis based regression \n") summary(x$regression) cat("\n set short = FALSE to see the detailed statistics \n" ) } }, guttman = { cat("Call: ") print(x$Call) cat("\nAlternative estimates of reliability\n") # cat("Beta = ", round(x$beta,digits), " This is an estimate of the worst split half reliability") cat("\nGuttman bounds \nL1 = ",round(x$lambda.1,digits), "\nL2 = ", round(x$lambda.2,digits), "\nL3 (alpha) = ", round(x$lambda.3,digits),"\nL4 (max) = " ,round(x$lambda.4,digits), "\nL5 = ", round(x$lambda.5,digits), "\nL6 (smc) = " ,round(x$lambda.6,digits), "\n") cat("TenBerge bounds \nmu0 = ",round(x$tenberge$mu0,digits), "mu1 = ", round(x$tenberge$mu1,digits), "mu2 = " ,round(x$tenberge$mu2,digits), "mu3 = ",round(x$tenberge$mu3,digits) , "\n") cat("\nalpha of first PC = ",round( x$alpha.pc,digits), "\nestimated greatest lower bound based upon communalities= ", round(x$glb,digits),"\n") cat("\nbeta found by splitHalf = ", round(x$beta,digits),"\n") } , ICC = {cat("Call: ") print(x$Call) cat("\nIntraclass correlation coefficients \n") print(x$results,digits=digits) cat("\n Number of subjects =", x$n.obs, " Number of Judges = ",x$n.judge) cat("\nSee the help file for a discussion of the other 4 McGraw and Wong estimates,") }, glb = { cat("Call: ") print(x$Call) cat("\nEstimates of the Greatest Lower Bound for reliability, based on factor and cluster models") cat("\nGLB estimated from factor based communalities = ", round(x$glb.fa$glb,digits) ,"with ",x$nf, " factors.") cat("\nUse glb.fa to see more details \n") cat("\n Various estimates based upon splitting the scale into two (see keys for the various splits)") cat("\nBeta = ", round(x$beta,digits) , "\nBeta fa",round(x$beta.fa,digits), " This is an estimate of the worst split half reliability") cat("\nKmeans clusters for best split ",round(x$glb.Km,digits=2)) cat("\nCluster based estimates \nglb.IC = ",round(x$glb.IC,digits)) cat("\nglb.max ", round(x$glb.max,digits),"Is the maximum of these estimates") cat("\n alpha-PC = ", round(x$alpha.pc,digits),"An estimate of alpha based on eignvalues") #\nglb.IC) = " ,round(x$glb.IC,digits)) #"\nL5 = ", round(x$lambda.5,digits), "\nL6 (smc) = " ,round(x$lambda.6,digits), "\n") cat("\nTenBerge bounds \nmu0 = ",round(x$tenberge$mu0,digits), "mu1 = ", round(x$tenberge$mu1,digits), "mu2 = " ,round(x$tenberge$mu2,digits), "mu3 = ",round(x$tenberge$mu3,digits) , "\n") cat("\nestimated greatest lower bound based upon splitHalf = ", round(x$glb.Fa,digits),"\n") if(!short) {cat("\n Various ways of keying the results\n") x$keys} else {cat("\nUse short = FALSE to see the various ways of splitting the scale")} }, iclust.sort = { nvar <- ncol(x$sort) x$sort[4:nvar] <- round(x$sort[4:nvar],digits) print(x$sort) }, irt.fa = { cat("Item Response Analysis using Factor Analysis \n") cat("\nCall: ") print(x$Call) if (!is.null(x$plot)) print(x$plot) if(!short) { nf <- length(x$irt$difficulty) for(i in 1:nf) {temp <- data.frame(discrimination=x$irt$discrimination[,i],location=x$irt$difficulty[[i]]) cat("\nItem discrimination and location for factor ",colnames(x$irt$discrimination)[i],"\n") print(round(temp,digits))} cat("\n These parameters were based on the following factor analysis\n") print(x$fa) } else {summary(x$fa)} }, irt.poly = { cat("Item Response Analysis using Factor Analysis \n") cat("\nCall: ") print(x$Call) if (!is.null(x$plot)) print(x$plot) #this calls the polyinfo print function below if(!short) { nf <- length(x$irt$difficulty) for(i in 1:nf) {temp <- data.frame(discrimination=x$irt$discrimination[,i],location=x$irt$difficulty[[i]]) cat("\nItem discrimination and location for factor ",colnames(x$irt$discrimination)[i],"\n") print(round(temp,digits))} cat("\n These parameters were based on the following factor analysis\n") print(x$fa) } else {summary(x$fa) } }, kappa = {if(is.null(x$cohen.kappa)) { cat("Call: ") print(x$Call) cat("\nCohen Kappa and Weighted Kappa correlation coefficients and confidence boundaries \n") print(x$confid,digits=digits) cat("\n Number of subjects =", x$n.obs,"\n")} else { cat("\nCohen Kappa (below the diagonal) and Weighted Kappa (above the diagonal) \nFor confidence intervals and detail print with all=TRUE\n") print(x$cohen.kappa,digits=digits) if(!is.null(x$av.kappa)) cat("\nAverage Cohen kappa for all raters ", round(x$av.kappa,digits=digits)) if(!is.null(x$av.wt)) cat("\nAverage weighted kappa for all raters ",round(x$av.wt,digits=digits)) } }, mardia = { cat("Call: ") print(x$Call) cat("\nMardia tests of multivariate skew and kurtosis\n") cat("Use describe(x) the to get univariate tests") cat("\nn.obs =",x$n.obs," num.vars = ",x$n.var,"\n") cat("b1p = ",round(x$b1p,digits)," skew = ",round(x$skew,digits ), " with probability <= ", signif(x$p.skew,digits)) cat("\n small sample skew = ",round(x$small.skew,digits ), " with probability <= ", signif(x$p.small,digits)) cat("\nb2p = ", round(x$b2p,digits)," kurtosis = ",round(x$kurtosis,digits)," with probability <= ",signif(x$p.kurt,digits )) }, mchoice = { cat("Call: ") print(x$Call) cat("\n(Unstandardized) Alpha:\n") print(x$alpha,digits=digits) cat("\nAverage item correlation:\n") print(x$av.r,digits=digits) if(!is.null(x$item.stats)) { cat("\nitem statistics \n") print(round(x$item.stats,digits=digits))} }, mixed= { cat("Call: ") print(x$Call) if(is.null(x$rho)) {if(lower) {lowerMat(x,digits=digits)} else {print(x,digits)} } else { if(lower) {if(length(x$rho)>1) { lowerMat (x$rho,digits=digits)} else {print(x$rho,digits)}} }}, omegaDirect ={ cat("Call: ") print(x$Call) cat("\nOmega from direct Schmid Leiman = ", round(x$omega.g,digits=digits),"\n") print_psych.fa(x) eigenvalues <- diag(t(x$loadings) %*% x$loadings) cat("\nWith eigenvalues of:\n") print(eigenvalues,digits=2) cat("The degrees of freedom for the model is",x$orth.f$dof," and the fit was ",round(x$orth.f$objective,digits),"\n") if(!is.na(x$orth.f$n.obs)) {cat("The number of observations was ",x$orth.f$n.obs, " with Chi Square = ",round(x$orth.f$STATISTIC,digits), " with prob < ", round(x$orth.f$PVAL,digits),"\n")} if(!is.null(x$orth.f$rms)) {cat("\nThe root mean square of the residuals is ", round(x$orth.f$rms,digits),"\n") } if(!is.null(x$orth.f$crms)) {cat("The df corrected root mean square of the residuals is ", round(x$orth.f$crms,digits),"\n") } if(!is.null(x$orth.f$RMSEA)) {cat("\nRMSEA and the ",x$orth.f$RMSEA[4] ,"confidence intervals are ",round(x$orth.f$RMSEA[1:3],digits+1)) } if(!is.null(x$orth.f$BIC)) {cat("\nBIC = ",round(x$orth.f$BIC,digits))} cat("\n Total, General and Subset omega for each subset\n") colnames(x$om.group) <- c("Omega total for total scores and subscales","Omega general for total scores and subscales ", "Omega group for total scores and subscales") #rownames(x$om.group) <- tn print(round(t(x$om.group),digits))}, paired.r = {cat("Call: ") print(x$Call) print(x$test) if(is.null(x$z)) {cat("t =",round(x$t,digits)) } else {cat("z =",round(x$z,digits)) } cat(" With probability = ",round(x$p,digits)) }, pairwise = {cat("Call: ") print(x$Call) cat("\nMean correlations within/between scales\n") lowerMat(x$av.r) cat("\nPercentage of complete correlations\n") lowerMat(x$percent) cat("\nNumber of complete correlations per scale\n") lowerMat(x$count) if(!is.null(x$size)) {cat("\nAverage number of pairwise observations per scale\n") lowerMat(round(x$size))} cat("\n Imputed correlations (if found) are in the imputed object") }, pairwiseCounts = {cat("Call: ") print(x$Call) cat("\nOverall descriptive statistics\n") if(!is.null(x$description)) print(x$description) cat("\nNumber of item pairs <=", x$cut," = ", dim(x$df)[1]) cat("\nItem numbers with pairs <= ",x$cut, " (row wise)", length(x$rows)) cat("\nItem numbers with pairs <= ",x$cut,"(col wise)", length(x$cols)) cat("\nFor names of the offending items, print with short=FALSE") if(!short) {cat("\n Items names with pairs < ", x$cut," (row wise)\n", names(x$rows)) cat("\n Items names with pairs <=",x$cut," (col wise)\n", names(x$cols))} cat("\nFor even more details examine the rows, cols and df report" ) }, parallel= { cat("Call: ") print(x$Call) if(!is.null(x$fa.values) & !is.null(x$pc.values) ) { parallel.df <- data.frame(fa=x$fa.values,fa.sam =x$fa.simr,fa.sim=x$fa.sim,pc= x$pc.values,pc.sam =x$pc.simr,pc.sim=x$pc.sim) fa.test <- x$nfact pc.test <- x$ncomp cat("Parallel analysis suggests that ") cat("the number of factors = ",fa.test, " and the number of components = ",pc.test,"\n") cat("\n Eigen Values of \n") colnames(parallel.df) <- c("Original factors","Resampled data", "Simulated data","Original components", "Resampled components", "Simulated components") if(any(is.na(x$fa.sim))) parallel.df <- parallel.df[-c(3,6)] } if(is.na(fa.test) ) fa.test <- 0 if(is.na(pc.test)) pc.test <- 0 if(!any(is.na(parallel.df))) {print(round(parallel.df[1:max(fa.test,pc.test),],digits))} else { if(!is.null(x$fa.values)) {cat("\n eigen values of factors\n") print(round(x$fa.values,digits))} if(!is.null(x$fa.sim)){cat("\n eigen values of simulated factors\n") print(round(x$fa.sim,digits))} if(!is.null(x$pc.values)){cat("\n eigen values of components \n") print(round(x$pc.values,digits))} if(!is.null(x$pc.sim)) {cat("\n eigen values of simulated components\n") print(round(x$pc.sim,digits=digits))} } }, partial.r = {cat("partial correlations \n") print(round(unclass(x),digits)) }, phi.demo = {print(x$tetrachoric) cat("\nPearson (phi) below the diagonal, phi2tetras above the diagonal\n") print(round(x$phis,digits)) cat("\nYule correlations") print(x$Yule) }, poly= {cat("Call: ") print(x$Call) cat("Polychoric correlations \n") if(!is.null(x$twobytwo)) { print(x$twobytwo,digits=digits) cat("\n implies tetrachoric correlation of ",round(-x$rho,digits))} else { if(!isSymmetric(x$rho)) lower<- FALSE if(lower) {lowerMat (x$rho,digits) } else {print(x$rho,digits)} cat("\n with tau of \n") print(x$tau,digits) if(!is.null(x$tauy)) print(x$tauy,digits) } }, polydi= {cat("Call: ") print(x$Call) cat("Correlations of polytomous with dichotomous\n") print(x$rho,digits) cat("\n with tau of \n") print(x$tau,digits) }, polyinfo = {cat("Item Response Analysis using Factor Analysis \n") cat("\n Summary information by factor and item") names(x$sumInfo ) <- paste("Factor",1:length(x$sumInfo)) for (f in 1:length(x$sumInfo)) { cat("\n Factor = ",f,"\n") temp <- x$sumInfo[[f]] temps <- rowSums(temp) if(sort) {ord <- order(temps,decreasing=TRUE) temp <- temp[ord,] temps <- temps[ord]} temp <- temp[temps > 0,] summary <- matrix(c(colSums(temp),sqrt(1/colSums(temp)),1-1/colSums(temp)),nrow=3,byrow=TRUE) rownames(summary) <-c("Test Info","SEM", "Reliability") temp <- rbind(temp,summary) if(ncol(temp) == 61) {print(round(temp[,seq(1,61,10)],digits=digits)) } else {print(round(temp,digits=digits))} #this gives us info at each unit } if(!short) { cat("\n Average information (area under the curve) \n") AUC <-x$AUC max.info <-x$max.info if(dim(AUC)[2]==1) {item <- 1:length(AUC) } else {item <- 1:dim(AUC)[1]} if(sort) { #first sort them into clusters #first find the maximum for each row and assign it to that cluster cluster <- apply(AUC,1,which.max) ord <- sort(cluster,index.return=TRUE) AUC <- AUC[ord$ix,,drop=FALSE] max.info <- max.info[ord$ix,,drop=FALSE] #now sort column wise #now sort the AUC that have their highest AUC on each cluster items <- table(cluster) #how many items are in each cluster? first <- 1 for (i in 1:length(items)) {# i is the factor number if(items[i] > 0 ) { last <- first + items[i]- 1 ord <- sort(abs(AUC[first:last,i]),decreasing=TRUE,index.return=TRUE) AUC[first:last,] <- AUC[item[ord$ix+first-1],] max.info[first:last,] <- max.info[item[ord$ix+first-1],] rownames(AUC)[first:last] <- rownames(max.info)[first:last] <- rownames(AUC)[ord$ix+first-1] first <- first + items[i] } } } #end of sort print(AUC,digits=digits) cat("\nMaximum value is at \n") print(max.info,digits=digits) } }, validity = { cat("Call: ") print(x$Call) cat("\nPredicted Asymptotic Scale Validity:\n") print(x$asymptotic,digits) cat("\n For predicted scale validities, average item validities, or scale reliabilities, print the separate objects") }, overlap = { cat("Call: ") print(x$Call) cat("\n(Standardized) Alpha:\n") print(x$alpha,digits) cat("\n(Standardized) G6*:\n") print(x$G6,digits) cat("\nAverage item correlation:\n") print(x$av.r,digits) cat("\nMedian item correlation:\n") print(x$med.r,digits) cat("\nNumber of items:\n") print(x$size) cat("\nSignal to Noise ratio based upon average r and n \n") print(x$sn,digits=digits) cat("\nScale intercorrelations corrected for item overlap and attenuation \n adjusted for overlap correlations below the diagonal, alpha on the diagonal \n corrected correlations above the diagonal:\n") print(x$corrected,digits) if(!is.null(x$quality) ) {cat("\n Percentage of keyed items with highest absolute correlation with scale (scale quality)\n") print(x$quality,digits=2) } if(!is.null(x$MIMS) ) {cat("\n Average adjusted correlations within and between scales (MIMS)\n") lowerMat(x$MIMS) } if(!is.null(x$MIMT) ) {cat("\n Average adjusted item x scale correlations within and between scales (MIMT)\n") lowerMat(x$MIMT) } if(short) {cat("\n In order to see the item by scale loadings and frequency counts of the data\n print with the short option = FALSE") } else { if(!is.null(x$item.cor) ) { cat("\nItem by scale correlations:\n corrected for item overlap and scale reliability\n" ) print(round(x$item.cor,digits=digits)) } } }, frequency = { cat("Response frequencies (of non-missing items) \n") print(unclass(x),digits=digits) }, r.test = {cat("Correlation tests \n") cat("Call:") print(x$Call) cat( x$Test,"\n") if(!is.null(x$t)) {cat(" t value" ,round(x$t,digits)," with probability <", signif(x$p,digits) )} if(!is.null(x$z)) {cat(" z value" ,round(x$z,digits)," with probability ", round(x$p,digits) )} if(!is.null(x$ci)) {cat("\n and confidence interval ",round(x$ci,digits) ) } }, reliability ={cat("Measures of reliability \n") if(is.list(x)) { print(x$Call) x <- x$result.df} print(round(unclass(x),digits)) }, residuals = { if(NCOL(x) == NROW(x)) { if (lower) {lowerMat (x,digits=digits)}} else {print(round(unclass(x),digits))} #tweaked 1/30/18 }, rmsea ={cat("RMSEA: Root Mean Square Error of Approximation\n") #"RMSEA=",round(x[[1]],digits=digits), "with lower bound = " ,round(x[[3]],digits=digits), "upper bound = ",round(x[[2]],digits=digits)) if(!is.null(x) ){cat("\nRMSEA and the ",1-x[[4]] ,"confidence intervals are ",round(x[[1]],digits+1),round(x[[2]],digits+1),round(x[[3]],digits+1)) } }, scree = { cat("Scree of eigen values \nCall: ") print(x$Call) if(!is.null(x$fv)) {cat("Eigen values of factors ") print(round(x$fv,digits))} if (!is.null(x$pcv)) {cat("Eigen values of Principal Components") print(round(x$pcv,digits))} }, scores = { cat("Call: ") print(x$Call) if(x$raw) { cat("\n(Unstandardized) Alpha:\n") } else {cat("\n(Standardized) Alpha:\n") } print(x$alpha,digits=digits) if(!is.null(x$ase)) {cat("\nStandard errors of unstandardized Alpha:\n") rownames(x$ase) <- "ASE " print(x$ase,digit=digits) } if(!is.null(x$alpha.ob)) {cat("\nStandardized Alpha of observed scales:\n") print(x$alpha.ob,digits=digits)} cat("\nAverage item correlation:\n") print(x$av.r,digits=digits) cat("\nMedian item correlation:\n") print(x$med.r,digits=digits) cat("\n Guttman 6* reliability: \n") print(x$G6,digits=digits) cat("\nSignal/Noise based upon av.r : \n") print(x$sn,digits=digits) #if(iclust) {cat("\nOriginal Beta:\n") # print(x$beta,digits) } cat("\nScale intercorrelations corrected for attenuation \n raw correlations below the diagonal, alpha on the diagonal \n corrected correlations above the diagonal:\n") if(!is.null(x$alpha.ob)) {cat("\nNote that these are the correlations of the complete scales based on the correlation matrix,\n not the observed scales based on the raw items.\n")} print(x$corrected,digits) if(!is.null(x$MIMS) ) {cat("\n Average adjusted correlations within and between scales (MIMS)\n") lowerMat(x$MIMS) } if(!is.null(x$MIMT) ) {cat("\n Average adjusted item x scale correlations within and between scales (MIMT)\n") lowerMat(x$MIMT) } if(short) {cat("\n In order to see the item by scale loadings and frequency counts of the data\n print with the short option = FALSE") } else { if(!is.null(x$item.cor) ) { cat("\nItem by scale correlations:\n corrected for item overlap and scale reliability\n" ) print(round(x$item.corrected,digits=digits)) } if(!is.null(x$response.freq)) { cat("\nNon missing response frequency for each item\n") print(round(x$response.freq,digits=digits))} } }, scoreBy = { cat("Call: ") print(x$Call) ngroup <- length(x$cor) for(i in 1:ngroup) { # cat("\n group = ",names(x$cor[i]),"\n") if(!is.na(x$cor[[i]])) { cat("\n Correlations for group",names(x$cor[i]),"\n") lowerMat(x$cor[[i]]$cor) cat("\n alpha \n") print(round(x$alpha[[i]]$alpha,digits)) }} cat("\n To see the correlations as a matrix, examine the cor.mat object ") }, lmCor= { cat("Call: ") print(x$Call) if(x$raw) {cat("\nMultiple Regression from raw data \n")} else { cat("\nMultiple Regression from matrix input \n")} if(!is.null(x$z)) cat("The following variables were partialed out:", x$z, "\n and are included in the calculation of df1 and df2\n") ny <- NCOL(x$coefficients) for(i in 1:ny) {cat("\n DV = ",colnames(x$coefficients)[i], "\n") # if(!is.na(x$intercept[i])) {cat(' intercept = ',round(x$intercept[i],digits=digits),"\n")} if(!is.null(x$se)) {result.df <- data.frame( round(x$coefficients[,i],digits),round(x$se[,i],digits),round(x$t[,i],digits),signif(x$Probability[,i],digits),round(x$ci[,i],digits), round(x$ci[,(i +ny)],digits),round(x$VIF,digits),round(x$Vaxy[,i],digits)) colnames(result.df) <- c("slope","se", "t", "p","lower.ci","upper.ci", "VIF","Vy.x") print(result.df) cat("\nResidual Standard Error = ",round(x$SE.resid[i],digits), " with ",x$df[2], " degrees of freedom\n") result.df <- data.frame(R = round(x$R[i],digits), R2 = round(x$R2[i],digits), Ruw = round(x$ruw[i],digits),R2uw = round( x$ruw[i]^2,digits), round(x$shrunkenR2[i],digits),round(x$seR2[i],digits), round(x$F[i],digits),x$df[1],x$df[2], signif(x$probF[i],digits+1)) colnames(result.df) <- c("R","R2", "Ruw", "R2uw","Shrunken R2", "SE of R2", "overall F","df1","df2","p") cat("\n Multiple Regression\n") print(result.df) } else { result.df <- data.frame( round(x$coefficients[,i],digits),round(x$VIF,digits),round(x$Vaxy[,i],digits)) colnames(result.df) <- c("slope", "VIF","Vy.x") print(result.df) result.df <- data.frame(R = round(x$R[i],digits), R2 = round(x$R2[i],digits), Ruw = round(x$ruw[i],digits),R2uw = round( x$ruw[i]^2,digits)) colnames(result.df) <- c("R","R2", "Ruw", "R2uw") cat("\n Multiple Regression\n") print(result.df) } } if(!is.null(x$cancor)) { cat("\nVarious estimates of between set correlations\n") cat("Squared Canonical Correlations \n") print(x$cancor2,digits=digits) if(!is.null(x$Chisq)) {cat("Chisq of canonical correlations \n") print(x$Chisq,digits=digits)} cat("\n Average squared canonical correlation = ",round(x$T,digits=digits)) cat("\n Cohen's Set Correlation R2 = ",round(x$Rset,digits=digits)) #print(x$Rset,digits=digits) if(!is.null(x$Rset.shrunk)){ cat("\n Shrunken Set Correlation R2 = ",round(x$Rset.shrunk,digits=digits)) cat("\n F and df of Cohen's Set Correlation ",round(c(x$Rset.F,x$Rsetu,x$Rsetv), digits=digits))} cat("\nUnweighted correlation between the two sets = ",round(x$Ruw,digits)) } }, sim = { if(is.matrix(x)) {x <-unclass(x) round(x,digits) } else { cat("Call: ") print(x$Call) cat("\n $model (Population correlation matrix) \n") print(x$model,digits) if(!is.null(x$reliability)) { cat("\n$reliability (population reliability) \n") print(x$reliability,digits) } if(!is.null(x$N) && !is.null(x$r)) { cat("\n$r (Sample correlation matrix for sample size = ",x$N,")\n") print(x$r,digits)} } }, smoother = {x <- unclass(x) print(x) }, split ={ cat("Split half reliabilities ") cat("\nCall: ") print(x$Call) cat("\nMaximum split half reliability (lambda 4) = ",round(x$maxrb,digits=digits)) cat("\nGuttman lambda 6 = ",round(x$lambda6,digits=digits)) cat("\nAverage split half reliability = ",round(x$meanr,digits=digits)) cat("\nGuttman lambda 3 (alpha) = ",round(x$alpha,digits=digits)) cat("\nGuttman lambda 2 = ", round(x$lambda2,digits=digits)) cat("\nMinimum split half reliability (beta) = ",round(x$minrb,digits=digits)) if(x$covar) { cat("\nAverage interitem covariance = ",round(x$av.r,digits=digits)," with median = ", round(x$med.r,digits=digits))} else { cat("\nAverage interitem r = ",round(x$av.r,digits=digits)," with median = ", round(x$med.r,digits=digits))} if(!is.na(x$ci[1])) {cat("\n ",names(x$ci)) cat("\n Quantiles of split half reliability = ",round(x$ci,digits=digits))} }, statsBy ={ cat("Statistics within and between groups ") cat("\nCall: ") print(x$Call) cat("Intraclass Correlation 1 (Percentage of variance due to groups) \n") print(round(x$ICC1,digits)) cat("Intraclass Correlation 2 (Reliability of group differences) \n") print(round(x$ICC2,digits)) cat("eta^2 between groups \n") print(round(x$etabg^2,digits)) if(short) { cat("\nTo see the correlations between and within groups, use the short=FALSE option in your print statement.")} if(!short) {cat("Correlation between groups \n") lowerMat(x$rbg) cat("Correlation within groups \n") lowerMat(x$rwg) } cat("\nMany results are not shown directly. To see specific objects select from the following list:\n",names(x)) }, tau = {cat("Tau values from dichotomous or polytomous data \n") class(x) <- NULL print(x,digits) }, tetra = {cat("Call: ") print(x$Call) cat("tetrachoric correlation \n") if(!is.null(x$twobytwo)) { print(x$twobytwo,digits=digits) cat("\n implies tetrachoric correlation of ",round(x$rho,digits))} else {if(length(x$rho)>1) { if(!isSymmetric(x$rho)) lower <- FALSE} else {lower<- FALSE} if(is.matrix(x$rho) && lower) {lowerMat (x$rho,digits)} else { print(x$rho,digits)} cat("\n with tau of \n") print(x$tau,digits) if(!is.null(x$tauy)) print(x$tauy,digits) } }, thurstone = { cat("Thurstonian scale (case 5) scale values ") cat("\nCall: ") print(x$Call) print(x$scale) cat("\n Goodness of fit of model ", round(x$GF,digits)) }, KMO = {cat("Kaiser-Meyer-Olkin factor adequacy") cat("\nCall: ") print(x$Call) cat("Overall MSA = ",round(x$MSA,digits)) cat("\nMSA for each item = \n") print(round(x$MSAi,digits)) }, unidim= { cat("\nA measure of unidimensionality \n Call: ") print(x$Call) cat("\nUnidimensionality index = \n" ) print(round(x$uni,digits=digits)) cat("\nunidim adjusted index reverses negatively scored items.") cat("\nalpha "," Based upon reverse scoring some items.") cat ("\naverage and median correlations are based upon reversed scored items") }, yule = {cat("Yule and Generalized Yule coefficients") cat("\nCall: ") print(x$Call) cat("\nYule coefficient \n") print(round(x$rho,digits)) cat("\nUpper and Lower Confidence Intervals = \n") print(round(x$ci,digits)) }, Yule = {cat("Yule and Generalized Yule coefficients") cat("\nLower CI Yule coefficient Upper CI \n") print(round(c(x$lower,x$rho,x$upper),digits)) } ) #end of switch } #end function psych/R/spider.R0000644000176200001440000001337714547537555013251 0ustar liggesusers#radar and spider developed July 10, 2011 #added the ability to rotate the entire figure 02/24/20 and to use keys #Added label.pos to give better control of the figures "radar" <- function(x,labels=NULL,keys=NULL,center=FALSE,connect=FALSE,scale=1,ncolors=31,fill=FALSE,add=FALSE,linetyp="solid", main="Radar Plot",angle=0,absolute=FALSE,show=TRUE,digits=2,cut=.2,circles=TRUE,shape=FALSE, clockwise=FALSE,delta = NULL,label.pos=NULL, position=NULL ,xlim=c(-1,1),ylim=c(-1,1), ...) { #hard coded because something is wrong with call nvar <- length(x) lty="solid" if(!is.null(keys) ) {key.ord <- selectFromKeys(keys) num.keys <- length(keys) for (i in 1:num.keys) { select <- sub("-", "", unlist(keys[i])) keys[[i]]<- select} key.ord <- unlist(keys) } else {key.ord <-1:nvar num.keys <- 1 if (clockwise) key.ord <- nvar:1 } if(is.null(labels)) {labels <- names(keys) show.keys<- TRUE} else {labels <- labels[key.ord] show.keys <- FALSE} nvar <- length(key.ord) x <- x[key.ord] #this order the x vector according to the keys vector if(is.null(labels)) labels <- paste("V",1:nvar,sep="") if(is.null(delta)) {DELTA <- rep(1.1,nvar)} else {if(length(delta)==1) {DELTA =rep(delta,nvar) } else {DELTA=delta}} if(is.null(label.pos)) {label.pos <- rep(1.05,nvar) } else {if(length(label.pos)==1) label.pos= rep(label.pos,nvar)} if(is.null(position)) position <- rep(0,nvar) if (clockwise) { position <- position[key.ord] label.pos <- label.pos[key.ord] DELTA <- DELTA[key.ord] } SEGMENTS <- max(153,nvar) #increased from 48 to improve resolution if(ncolors < 2) {colors <- FALSE} else {colors <- TRUE} angles <- (0:SEGMENTS) * 2 * pi/SEGMENTS unit.circle <- cbind(cos(angles), sin(angles)) if(!add) {if(shape) {nx <- SEGMENTS/nvar Lx <- seq(1,SEGMENTS+1,nx)#%% SEGMENTS + 1 plot(unit.circle[Lx,1],unit.circle[Lx,2],axes=FALSE,xlab="",ylab="",typ="l",xlim=xlim,ylim=ylim) } else {plot(unit.circle,typ="l",asp=1,axes=FALSE,xlab="",ylab="",main=main,xlim=xlim,ylim=ylim)} if(circles) {lines(unit.circle*.25,typ="l",lty="dotted",col="red") lines(unit.circle*.5,typ="l",lty="dotted") lines(unit.circle*.75,typ="l",lty="dotted",col="blue")} } if(colors) { gr <- colorRampPalette(c("red","white","blue")) #added June 20 colramp <- gr(ncolors) } else { colramp <- grey((ncolors:0)/ncolors)} for(c in 1:nvar) { nx <- (((c-1 + angle*num.keys) %% nvar) +1 )* SEGMENTS/nvar +1 nx <- nx %% SEGMENTS # +1 if(center) {x0 <- unit.circle[nx,1] * .5 y0 <- unit.circle[nx,2] * .5 } else { x0 <- 0 y0 <- 0} if(!absolute) {scaler <- (x[c]*scale/2 + .5) } else {scaler <- abs(x[c]*scale )}#stats can go from -1 to 1, scale from 0 to 1 x1 <- unit.circle[nx,1] y1 <- unit.circle[nx,2] Lx <- c(x0,x1)*scaler Ly <- c(y0,y1) *scaler if(c==1) { Oldx <- unit.circle[(nvar)*SEGMENTS/nvar + 1,1]*(x[nvar]*scale/2 +.5) #had been nvar-1 until 01/09/24 Oldy <- unit.circle[(nvar)*SEGMENTS/nvar + 1,2]*(x[nvar]*scale/2+.5)} if(colors) { if(absolute) { {if (x[c] < 0 ) {col="red" lty="dotted"} else {col="blue" lty="solid"} }} else {if (scaler < .5) {col="red"} else {col="blue"} } lines(Lx,Ly,col=col,lty=lty,...) } else { lines(Lx,Ly,...)} if(show) { txt <- paste0(sprintf("%.2f",round(x[c],2))) if (abs(x[c]) > cut) {text(Lx[2]*DELTA[c],Ly[2]*DELTA[c],txt,...)} # if (abs(x[c]) > cut) {text(Lx[2]*sign(Lx[2])*DELTA[c],Ly[2]*sign(Ly[2])*DELTA[c],txt)} } if(connect) {lines(c(Oldx,x1*scaler),c(Oldy,y1*scaler),lty=linetyp)} if(fill) {polygon(c(0,Oldx , x1*scaler,0),c(0,Oldy,y1*scaler,0),col=colramp[ceiling(scaler*ncolors)],...)} Oldx <- x1*scaler Oldy <- y1* scaler if(!show.keys) { if(position[c] ==0) {pos=NULL} else { pos=position[c]} #it is not at all clear what I am trying to do here text(x1*label.pos[c],y1*label.pos[c],labels[c],pos=pos,...)} if(show.keys) {for(cc in 1:length(labels)) { nx <- (((cc-1 + angle) %% num.keys) +1 )* SEGMENTS/num.keys +1 x1 <- unit.circle[nx,1] y1 <- unit.circle[nx,2] text(x1*label.pos[nx] ,y1*label.pos[nx],labels[cc],...) } } } #end of 1:nvar loop } "spider" <- function(y,x,data,labels=NULL,rescale = FALSE,center=FALSE,connect=TRUE,overlay=FALSE,scale=1,ncolors=31,fill=FALSE,main=NULL,...) { if(inherits(y,"formula")) { #taken from lm -- allows formula input if desired ps <- fparse(y) y <- ps$y x <- ps$x med <- ps$m #but, mediation is not done here, so we just add this to x # if(!is.null(med)) x <- c(x,med) #not necessary, because we automatically put this in prod <- ps$prod z <- ps$z #do we have any variable to partial out ex <- ps$ex } if(is.null(labels)) labels <- colnames(data)[x] if(rescale) { data <- scale(data)/3 } #rescales to -1 to 1 if(length(y)==1) { if(!is.null(main)) {main=main} else {main <- colnames(data)[y]} radar(data[y,x],labels=labels,center=center,connect=connect,scale=scale,ncolors=ncolors,fill=fill,main=main,...) } else { nvar <- length(y) for (i in 1:nvar) { if(!is.null(main)) {title=main[y[i]]} else {title <- colnames(data)[y[i]]} if(overlay) { if (i==1) {radar(data[y[i],x],labels=labels,center=center,connect=connect,scale=scale,ncolors=ncolors,fill=fill,main=title,...) } else { radar(data[y[i],x],labels=labels,center=center,connect=connect,scale=scale,ncolors=ncolors,fill=fill,add=TRUE,linetyp=nvar %%6 + 2,main=title,...) } } else { radar(data[y[i],x],labels=labels,center=center,connect=connect,scale=scale,ncolors=ncolors,fill=fill,main=title,...) }} } } psych/R/circ.simulation.R0000644000176200001440000000227312253362016015033 0ustar liggesusers"circ.simulation" <- function(samplesize=c(100,200,400,800), numberofvariables=c(16,32,48,72)) { ncases=length(samplesize) nvar <- length(numberofvariables) results <- matrix(NaN,ncol=ncases,nrow=nvar*ncases) results.ls <- list() case <- 1 for (ss in 1:ncases) { for (nv in 1:nvar) { circ.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss]) sim.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss],circum=FALSE) elipse.data <- circ.sim(nvar=numberofvariables[nv],nsub=samplesize[ss],yloading=.4) r.circ<- cor(circ.data) r.sim <- cor(sim.data) r.elipse <- cor(elipse.data) pc.circ <- principal(r.circ,2) pc.sim <- principal(r.sim,2) pc.elipse <- principal(r.elipse,2) case <- case + 1 results.ls[[case]] <- list(numberofvariables[nv],samplesize[ss],circ.tests(pc.circ),circ.tests(pc.elipse),circ.tests(pc.sim)) } } results.mat <- matrix(unlist(results.ls),ncol=14,byrow=TRUE) colnames(results.mat) <- c("nvar","n","c-gap","c-fisher","c-RT","c-VT","e-gap","e-fisher","e-RT","e-VT","s-gap","s-fisher","s-RT","s-VT") results.df <- data.frame(results.mat) return(results.df) }psych/R/multi.hist.R0000644000176200001440000001107014101257744014030 0ustar liggesusers#Added mar and changes main Sept 23, 2018 #slightly revised June 12, 2021 "multi.hist" <- function(x,nrow=NULL,ncol=NULL,density=TRUE,freq=FALSE,bcol="white",dcol=c("black","black"),dlty=c("dashed","dotted"),main=NULL,mar=c(2,1,1,1), breaks=21,global=TRUE,...) { #if((!is.matrix(x)) & (!is.data.frame(x))) {nvar <- 1 # x <- as.matrix(x,ncol=1) } else { if(is.list(x) & !is.data.frame(x)) {nvar <- length(x) xlabs <- main <-names(x) xx <- unlist(x) global.min <- min(xx,na.rm=TRUE) global.max <- max(xx,na.rm=TRUE)} else { x <- as.data.frame(x) xlabs <-colnames(x) nvar <- NCOL(x) #number of variables global.min <- min(x,na.rm=TRUE) global.max <- max(x,na.rm=TRUE) } if(global) xlim=c(global.min-.05,global.max+.05) #this makes all the plots on the same scale old.par <- par(no.readonly = TRUE) if((is.null(main)) & nvar==1) main <- "Histogram, Density, and Normal Fit" if (length(dcol)<2) dcol <- c(dcol,dcol) if(is.null(main)) {main <- c(colnames(x)) } else {main <- rep(main,nvar)} nsize=ceiling(sqrt(nvar)) #size of graphic if(is.null(nrow) ) {nrow <- nsize} else {ncol <- nvar/nrow} if(is.null(ncol)) {ncol <- ceiling(nvar/nsize )} else {nrow <- nvar/ncol} par(mfrow=c(nrow,ncol)) #set new graphic parameters par(mar=mar) for (i in 1:nvar) { if(is.data.frame(x) ) {xi <-x[,i]} else {xi <-x[[i]] } if(!global) {xlim = c(min(xi, na.rm=TRUE),max(xi,na.rm=TRUE))} if(density) {histo.density(xi,xlab=xlabs[i],main=main[i],freq=freq,bcol,dcol=dcol,dlty=dlty,breaks=breaks,xlim=xlim,...)} else { hist(xi,main=main[i],xlab=xlabs[i],freq=freq,bcol,dcol=dcol,dlty=dlty,breaks=breaks,xlim=xlim,...)} } #draw the histograms for each variable on.exit(par(old.par)) #set the graphic parameters back to the original } # } "histo.density" <- function(x,main="Histogram, Density, and Normal Fit",freq=FALSE,xlab=NULL,bcol="white",dcol=c("black","black"),dlty=c("dashed","dotted"),breaks=21,...) { h <- hist(x,plot=FALSE,breaks=breaks) m1 <- mean(x,na.rm=TRUE) s1 <- sd(x,na.rm=TRUE) d <- density(x,na.rm=TRUE) if(freq) {ymax <- max(h$count)} else {ymax <- max(h$density)} dmax <- max(d$y) ymax <- max(ymax,dmax) plot(h,freq=freq,ylim=c(0,ymax*1.2),main=main,xlab=xlab,col=bcol,ylab=NULL,...) if(!freq) {lines(d,lty=dlty[1],col=dcol[1],...) curve(dnorm(x,m1,s1),add=TRUE,lty=dlty[2],col=dcol[2],...)} else { lines(d$x,lty=dlty[1],col=dcol[1],...)} } "histBy" <- function(x,var,group,data=NULL,density=TRUE,alpha=.5,breaks=21,col,xlab,main="Histograms by group",freq=FALSE,...) { formula <- FALSE if(inherits(x, "formula")) { ps <- fparse(x) formula <- TRUE if(is.null(data)) stop("You must specify the data if you are using formula input") x <- data group <- ps$x var <- ps$y } old.par <- par(no.readonly = TRUE) if(missing(xlab)) xlab = var if(missing(group)) { if(missing(col)) col12 <- col2rgb("blue",TRUE)/255 col <- rgb(col12[1],col12[2],col12[3],alpha) hist(x[,var],xlab=xlab,main=main,breaks=breaks,freq=freq,col=col,...) # hist(x[,var],xlab=xlab,main=main,breaks=breaks,col=col,...) d <- density(x[,var],na.rm=TRUE) if(density) lines(d) } else { #the normal case gr <- x[group] grp<- table(gr) if(missing(col)) col <- rainbow(length(grp)) col12 <- col2rgb(col,TRUE)/255 col <- rgb(col12[1,],col12[2,],col12[3,],alpha) xlim=range(x[var],na.rm=TRUE) test <- hist(x[,var],breaks=breaks,plot=FALSE) breaks <- test$breaks grp <- names(grp) d <- density(x[(gr==grp[1]),var],na.rm=TRUE) hist(x[(gr==grp[1]),var],xlim=xlim,col=col[1],breaks=breaks,freq=freq,xlab=xlab,main=main,...) if(density) lines(d) for(i in (2:length(grp))) { # hist(x[(gr==grp[i]),var],xlim=xlim,col=col[i],freq=FALSE,breaks=breaks,add=TRUE,...) hist(x[(gr==grp[i]),var],xlim=xlim,col=col[i],breaks=breaks,add=TRUE,freq=freq,...) d <- density(x[(gr==grp[i]),var],na.rm=TRUE) if(density) lines(d) }} on.exit(par(old.par)) } #moved to (and expanded) as plot.reliabiity # plot.reliability <- function(x,ncol=NULL,nrow=NULL,...) { # global.min <- min(x,na.rm=TRUE) # global.max <- max(x,na.rm=TRUE) # nvar <-nrow(x) # # old.par <- par(no.readonly = TRUE) # nsize=ceiling(sqrt(nvar)) # if(is.null(nrow) ) {nrow <- nsize} else {ncol <- nvar/nrow} # if(is.null(ncol)) {ncol <- ceiling(nvar/nsize )} else {nrow <- nvar/ncol} # # par(mfrow=c(nrow,ncol)) #set new graphic parameters # #par(mar=mar) # # for (i in 1:nvar){ # # mp =barplot(x[i,],...) # # } # on.exit(par(old.par)) # }psych/R/draw.tetra.R0000644000176200001440000001247713303262222014005 0ustar liggesusers"draw.tetra" <- function(r,t1,t2,shade=TRUE) { binBvn <- function(rho, rc, cc) { row.cuts <- c(-Inf, rc, Inf) col.cuts <- c(-Inf, cc, Inf) P <- matrix(0, 2, 2) R <- matrix(c(1, rho, rho, 1), 2, 2) for (i in 1:2) { for (j in 1:2) { # P[i, j] <- pmvnorm(lower = c(row.cuts[i], col.cuts[j]), upper = c(row.cuts[i + 1], col.cuts[j + 1]), corr = R) P[i, j] <- sadmvn(lower = c(row.cuts[i], col.cuts[j]), upper = c(row.cuts[i + 1], col.cuts[j + 1]), mean=rep(0,2), varcov = R) } } P } def.par <- par(no.readonly = TRUE) # save default, for resetting... if(missing(r)) r <- .5 if(missing(t1)) t1 <- 1 if(missing(t2)) t2 <- 1 segments = 101 nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) #layout.show(nf) par(mar=c(3,3,1,1)) #x amd y coordinates for an ellipse (code taken from John Fox) angles <- (0:segments) * 2 * pi/segments unit.circle <- cbind(cos(angles), sin(angles)) if (abs(r) > 0) { theta <- sign(r)/sqrt(2)} else {theta = 1/sqrt(2)} shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta, theta, -theta, theta), ncol = 2, byrow = TRUE) ellipse <- unit.circle %*% shape x <- t1 y <- t2 xrange <- c(-3,3) yrange <- c(-3,3) xloc = x+(3-x)/2 yloc <- y+ (3-y)/2 plot(x, y, xlim=xrange, ylim=yrange, xlab="X", ylab="Y",type="n") lines(ellipse, type = "l") ellipse3 <- ellipse*3 lines(ellipse3,type="l") abline(v=x) abline(h=y) if(shade) { poly <- matrix(c(rep(t1,segments+1),rep(t2,segments+1)),ncol=2) poly[,1] <- pmax(ellipse3[,1],t1) poly[,2] <- pmax(ellipse3[,2],t2) polygon(poly ,density=10,angle=90) } text(0,0,paste("rho = ",r)) SR <- 1 - pnorm(t1) BR <- 1 - pnorm(t2) #P <- binBvn(r,HR,SR) P <- binBvn(r,t1,t2) ph <- phi(P) text(0,-.3,paste("phi = " ,round(ph,2))) text(xloc,yloc+.1,expression(X > tau)) text(xloc,yloc-.1,expression(Y > Tau)) text(-xloc,yloc+.1,expression( X < tau)) text(-xloc,yloc-.1,expression(Y > Tau)) text(xloc, -yloc + .1,expression( X > tau)) text(xloc,-yloc-.1,expression(Y < Tau)) text(-xloc,-yloc+.1,expression( X < tau)) text(-xloc,-yloc-.1,expression(Y < Tau)) #now the x distribution par(mar=c(0,3,1,1)) curve(dnorm(x),-3,3,axes=FALSE) lines(c(x,x),c(0,dnorm(x))) text(xloc,dnorm(xloc)+.05,expression( X > tau)) text(t1,dnorm(t1) + .05,expression(tau)) if(shade) { xvals <- seq(t1,3,.1) yvals <- dnorm(xvals) polygon(c(xvals,rev(xvals)),c(rep(0,length(xvals)),dnorm(rev(xvals))),density=10,angle=-45) } #and the y distribution par(mar=c(3,0,1,1)) x1 <- seq(-3,3,6/segments) y1 <- dnorm(x1) plot(y1,x1,axes=FALSE,typ="l") lines(c(0,dnorm(y)),c(y,y)) text(.1,yloc,expression( Y > Tau)) text(dnorm(t2)+.05,t2,expression(Tau)) if(shade) { yvals <- seq(t2,3,.02) xvals <- dnorm(yvals) polygon(c(xvals,rev(xvals)),c(yvals,rep(t2,length(xvals))),density=10,angle=45) } #figure out the equivalent phi par(def.par) #reset to the original values P1 <- P P1[2,2] <- P[1,1] P1[1,1] <- P[2,2] colnames(P1) <- c("P.pos","P.neg") rownames(P1) <- c("True.pos","True.neg") return <-(list(P=P1,BR=BR,SR=SR)) } #show the density of a bivariate correlation #adapted from the examples of persp #meant to show how tetrachorics work #8/4/14 draw.cor <- function(r=.5,expand=10,theta=30,phi=30,N=101,nbcol=30,box=TRUE,main="Bivariate density rho = ",cuts=NULL,all=TRUE,ellipses=TRUE,ze=.15) { sigma <- matrix(c(1,r,r,1),2,2) #the covariance matrix x <- seq(-3, 3, length= N) y <- x #f <- function(x, y,sigma=sigma) { r <- dmvnorm(cbind(x,y),sigma=sigma)} f <- function(x, y,sigma=sigma) { r <- dmnorm(cbind(x,y),varcov=sigma)} z <- outer(x,y,f,sigma=sigma) nrz <- nrow(z) ncz <- ncol(z) jet.colors <- colorRampPalette( c("light blue", "red") ) # Generate the desired number of colors from this palette nbcol <- 100 color <- jet.colors(nbcol) color <- jet.colors(nbcol) nbcol <- length(color) #color <- terrain.colors(nbcol) #color <- rainbow(nbcol) # Compute the z-value at the facet centres zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz] # Recode facet z-values into color indices facetcol <- cut(zfacet, nbcol) if(is.null(cuts)) { #the default is to show a correlation pmat <- persp(x, y, z, col = color[facetcol], phi = phi, theta = theta,scale=FALSE,expand=expand,box=box,main=paste(main,r)) } else { if(!all) {z[,y > cuts[2] ]<- NA #just show the lower quarter z[ x > cuts[1], ]<- NA} z[,abs(y - cuts[2]) < 6/(N)] <- 0 #show all four quarters, with a notch in the distributions z[abs(x -cuts[1]) < 6/(N),] <- 0 pmat <- persp(x, y, z, col = color[facetcol], phi = phi, theta = theta,scale=FALSE,expand=expand,box=box,main=paste(main,r)) } if(ellipses) { angles <- (0:N) * 2 * pi/N unit.circle <- cbind(cos(angles), sin(angles)) if (abs(r) > 0) theta1 <- sign(r)/sqrt(2) else theta1 = 1/sqrt(2) shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta1, theta1, -theta1, theta1), ncol = 2, byrow = TRUE) ellipse <- unit.circle %*% shape*2 lines (trans3d(ellipse[,1],ellipse[,2],z = ze, pmat = pmat),col = "red", lwd = 2) } #show it again, but without the ellipses pmat <- persp(x, y, z, col = color[facetcol], phi = phi, theta = theta,scale=FALSE,expand=expand,box=box,main=paste(main,r)) } psych/R/fa.diagram.R0000644000176200001440000002471614463261475013743 0ustar liggesusers#factor analysis and sem diagrams #based upon fa.graph with some ideas taken from the diagram and shape packages of Karline Soetaert #version of September 30, 2009 #developed to replace Rgraphviz which is too much of a pain to install #Rgraphviz uses a NEL (Node, Edge, Label) representation while diagram uses a complete linking matrix #thus, I am trying to combine these two approaches #some small revisions, March 2015 to allow cex to be passed to dia.ellipse and dia.rect #revised June, 2020 to use dia.ellipse and dia.rect and dia.arrow in vector mode "fa.diagram" <- function(fa.results,Phi=NULL,fe.results=NULL,sort=TRUE,labels=NULL,cut=.3,simple=TRUE,errors=FALSE,g=FALSE, digits=1,e.size=.05,rsize=.15,side=2,main,cex=NULL,l.cex=NULL,marg=c(.5,.5,1,.5),adj=1,ic=FALSE, ...) { if(length(class(fa.results)) > 1) {if(inherits(fa.results, 'principal')) {pc <- TRUE} else {pc <- FALSE}} else { pc <- FALSE} if(ic) pc <- TRUE old.par<- par(mar=marg) #give the window some narrower margins on.exit(par(old.par)) #set them back col <- c("black","red") if(missing(main)) if(is.null(fe.results)) {if(pc) {main <- "Components Analysis" } else {main <- "Factor Analysis" }} else {main <- "Factor analysis and extension"} if(!is.matrix(fa.results) && !is.null(fa.results$fa) && is.list(fa.results$fa)) fa.results <- fa.results$fa if(is.null(cex)) cex <- 1 if(is.null(l.cex)) l.cex <- 1 #Phi <- NULL #the default case if(sort) { if(g) {temp <- fa.sort(fa.results[,-1]) temp2 <- fa.results[,1] fa.results <- cbind(g=temp2[rownames(temp)],temp) } else {fa.results <- fa.sort(fa.results)} #if we have g loadings, don't sort the entire array if(!is.null(fe.results)) { fe.results <- fa.sort(fe.results)} } if((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {factors <- as.matrix(fa.results$loadings) if(!is.null(fa.results$Phi)) {Phi <- fa.results$Phi} else { if(!is.null(fa.results$cor)) {Phi<- fa.results$cor} }} else {factors <- fa.results} nvar <- dim(factors)[1] #how many variables? if (is.null(nvar) ){nvar <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} #first some basic setup parameters nvar <- dim(factors)[1] #how many variables? e.size = e.size*16*cex/nvar if (is.null(nvar) ){nvar <- length(factors) num.factors <- 1} else { num.factors <- dim(factors)[2]} if (is.null(rownames(factors))) {rownames(factors) <- paste("V",1:nvar,sep="") } if (is.null(colnames(factors))) {colnames(factors) <- paste("F",1:num.factors,sep="") } var.rect <- list() fact.rect <- list() max.len <- max(nchar(rownames(factors)))*rsize x.max <- max((nvar+1),6) limx=c(-max.len/2,x.max) n.evar <- 0 if(!is.null(fe.results)) {n.evar <- dim(fe.results$loadings)[1] limy <- c(0,max(nvar+1,n.evar+1))} else { limy=c(0,nvar+1) } top <- max(nvar,n.evar) + 1 # plot(0,type="n",xlim=limx,ylim=limy,asp=1,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main,...) plot(0,type="n",xlim=limx,ylim=limy,frame.plot=FALSE,axes=FALSE,ylab="",xlab="",main=main,...) max.len <- max(strwidth(rownames(factors)),strwidth("abc"))/1.8 #slightly more accurate, but needs to be called after plot is opened limx=c(-max.len/2,x.max) cex <- min(cex,20/x.max) if(g) {left <- .3*x.max #where should the variable boxes go? It depends upon g middle <- .6*x.max gf <- 2 } else {left <- 0 middle <- .5*x.max gf <- 1} for (v in 1:nvar) { var.rect[[v]] <- dia.rect(left,top -v - max(0,n.evar-nvar)/2 ,rownames(factors)[v],xlim=limx,ylim=limy,cex=cex,draw=FALSE,...) #find their locations but don't draw them } all.rects.x <- rep(left,nvar) all.rects.y <- top - 1:nvar - max(0,n.evar-nvar)/2 all.rects.names <- rownames(factors)[1:nvar] dia.rect(all.rects.x, all.rects.y,all.rects.names) #now draw them as a vector f.scale <- (top)/(num.factors+1) f.shift <- max(nvar,n.evar)/num.factors if(g) {fact.rect[[1]] <- dia.ellipse(-max.len/2,top/2,colnames(factors)[1],xlim=limx,ylim=limy,e.size=e.size,cex=cex,...) for (v in 1:nvar) {if(simple && (abs(factors[v,1]) == max(abs(factors[v,])) ) && (abs(factors[v,1]) > cut) | (!simple && (abs(factors[v,1]) > cut))) { dia.arrow(from=fact.rect[[1]],to=var.rect[[v]]$left,labels =round(factors[v,1],digits),col=((sign(factors[v,1])<0) +1),lty=((sign(factors[v,1])<0)+1)) }}} text.values <- list() tv.index <- 1 for (f in gf:num.factors) { #body 34 if (pc) {fact.rect[[f]] <- dia.rect(left+middle,(num.factors+gf-f)*f.scale,colnames(factors)[f],xlim=limx,ylim=limy,cex=cex,draw=FALSE,...) } else {fact.rect[[f]] <- dia.ellipse(left+middle,(num.factors+gf-f)*f.scale,colnames(factors)[f],xlim=limx,ylim=limy,e.size=e.size,cex=cex,draw=FALSE,...)} for (v in 1:nvar) { if(simple && (abs(factors[v,f]) == max(abs(factors[v,])) ) && (abs(factors[v,f]) > cut) | (!simple && (abs(factors[v,f]) > cut))) { if(pc) {text.values[[tv.index]] <- dia.arrow(to=fact.rect[[f]],from=var.rect[[v]]$right,labels =round(factors[v,f],digits),col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0)+1),adj=f %% adj ,cex=cex,draw=FALSE) tv.index <- tv.index + 1 } else {text.values[[tv.index]] <- dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$right,labels =round(factors[v,f],digits),col=((sign(factors[v,f])<0) +1),lty=((sign(factors[v,f])<0)+1),adj=f %% adj +1,cex=cex,draw=FALSE) tv.index <- tv.index + 1 } } } } # if(!is.null(fe.results)) { #this messes up the multidraw functions # e.loadings <- fe.results$loadings # # for (v in 1:n.evar) { # var.rect[[v]] <- dia.rect(x.max,top-v-max(0,nvar-n.evar)/2,rownames(e.loadings)[v],xlim=limx,ylim=limy,cex=cex,draw=FALSE,...) # for(f in 1:num.factors) { # if(simple && (abs(e.loadings[v,f]) == max(abs(e.loadings[v,])) ) && (abs(e.loadings[v,f]) > cut) | (!simple && (abs(e.loadings[v,f]) > cut))) { # dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$left,labels =round(e.loadings[v,f],digits),col=((sign(e.loadings[v,f])<0) +1),lty=((sign(e.loadings[v,f])<0)+1),adj=f %% adj +1, draw=FALSE)} # } # } # # } #now draw all the factors/components and their associated loadings #first, just the factors and their labels tv <- matrix(unlist(fact.rect),nrow=num.factors,byrow=TRUE) #unfortunately, this loses whether it is an elipse or not all.rects.x <- tv[,5] #the center of the figure all.rects.y <- tv[,2] all.rects.names <- colnames(factors) if(pc) { dia.rect(all.rects.x, all.rects.y,all.rects.names)} else {dia.multi.ellipse(all.rects.x,all.rects.y,all.rects.names,e.size=e.size) } #we need to remember whether to draw ellipses or boxes #now show all the loadings tv <- matrix(unlist(text.values),byrow=TRUE,ncol=21) #text(tv[,1],tv[,2],tv[,3],cex=tv[,5]) text(tv[,1],tv[,2],tv[,3],cex=l.cex) arrows(x0=tv[,6],y0=tv[,7],x1=tv[,8],y1=tv[,9],length=tv[1,10],angle=tv[1,11],code=1,col=tv[,20],lty=tv[,21]) arrows(x0=tv[,13],y0=tv[,14],x1=tv[,15],y1=tv[,16],length=tv[1,17],angle=tv[1,18],code=2,col=tv[,20],lty=tv[,21]) if(!is.null(Phi) && (ncol(Phi) >1)) { curve.list <- list() for (i in 2:num.factors) { for (j in 1:(i-1)) { if(abs(Phi[i,j]) > cut) { # dia.curve(from=c(x.max-2+ e.size*nvar,(num.factors+1-i)*f.scale),to=c(x.max -2+ e.size*nvar,(num.factors+1-j)*f.scale),labels=round(Phi[i,j],digits),scale=(i-j),...)} d.curve <- dia.curved.arrow(from=fact.rect[[j]]$right,to=fact.rect[[i]]$right,labels=round(Phi[i,j],digits),scale=(i-j),draw=FALSE,cex=cex,l.cex=l.cex,...) curve.list <- c(curve.list,d.curve) } } } multi.curved.arrow(curve.list,l.cex,...) } self.list <- list() if (errors) {for (v in 1:nvar) { d.self <- dia.self(location=var.rect[[v]],scale=.5,side=side) self.list <- c(self.list,d.self) } } if(length(self.list) > 0) multi.self(self.list) if(!is.null(fe.results)) { e.loadings <- fe.results$loadings for (v in 1:n.evar) { var.rect[[v]] <- dia.rect(x.max,top-v-max(0,nvar-n.evar)/2,rownames(e.loadings)[v],xlim=limx,ylim=limy,cex=cex,...) for(f in 1:num.factors) { if(simple && (abs(e.loadings[v,f]) == max(abs(e.loadings[v,])) ) && (abs(e.loadings[v,f]) > cut) | (!simple && (abs(e.loadings[v,f]) > cut))) { dia.arrow(from=fact.rect[[f]],to=var.rect[[v]]$left,labels =round(e.loadings[v,f],digits),col=((sign(e.loadings[v,f])<0) +1),lty=((sign(e.loadings[v,f])<0)+1),adj=f %% adj +1)} } } } } #draw a heterarchy diagram "het.diagram" <- function(r,levels,cut=.3,digits=2,both=TRUE,main="Heterarchy diagram",l.cex ,gap.size,...) { col = c("black","red") nlevels <- length(levels) if(missing(gap.size)) gap.size <- .4/nlevels if(missing(l.cex)) l.cex <- 1 nvar <- max(unlist(lapply(levels,length))) lowest <- rownames(r)[levels[[1]]] xlim <- c(-.25,(nlevels-.75)) ylim <- c(.25,nvar) plot(0,type="n",frame.plot=FALSE,axes=FALSE,xlim=xlim,ylim=ylim,ylab="",xlab="",main=main,...) x <- 0 #first draw the left most layer if(!is.null(names(levels))) {text(x,nvar,names(levels)[x+1]) } nlevel.i <- length(levels[[1]]) lower <- list(nlevel.i) spacing <- (nvar)/(nlevel.i+1) for (y in 1:nlevel.i) { lower[[y]] <- dia.rect(x,y*spacing,lowest[y],xlim=xlim,ylim=ylim,...) } names.i <- lowest #now repeat for each higher layer for(i in 2:(nlevels)){ nlevel.i <- length(levels[[i]]) level.i <- list(nlevel.i) names.next <- rownames(r)[levels[[i]]] x <- i-1 if(!is.null(names(levels))) {text(x,nvar,names(levels)[i]) } spacing <- (nvar)/(nlevel.i+1) for(y in 1:nlevel.i) { level.i[[y]] <- dia.rect(x,y*spacing,rownames(r)[levels[[i]][y]],xlim=xlim,ylim=ylim,...) for(j in 1:length(levels[[i-1]])) { if(abs(r[names.i[j],names.next[y]]) > cut) { dia.arrow(from=level.i[[y]]$left, to=lower[[j]]$right, labels=round(r[names.i[j],names.next[y]],digits),both=both,adj= y %%3 + 1, col=(sign(r[names.i[j],names.next[y]] < 0) +1),lty=(sign(r[names.i[j],names.next[y]] < 0)+1),l.cex = l.cex,gap.size=gap.size,...) } } } #end of drawing the factors for this level names.i <- names.next lower <-level.i } #end of levels (i) loop } psych/R/meta.schmid.R0000644000176200001440000000022511422752643014125 0ustar liggesusers"meta.schmid" <- function(L = NULL,L2=NULL,g3 =NULL,m=NULL) { Vt = sum(m) g <- L %*% L2 omega <- sum(g)^2/Vt result <- list(g,omega) return(result) }psych/R/toDotty.R0000644000176200001440000000046012456461256013405 0ustar liggesusers"toDotty" <- function(graph,filename,...) { if(!requireNamespace('Rgraphviz') && !requireNamespace(graph)) {stop("You have called a function requiring Rgraphviz and it is not installed. Install it and try again.") toDot <- function() {} #dummy function} else { Rgraphviz::toDot(graph,filename,...)} }psych/MD50000644000176200001440000005701614552221456011727 0ustar liggesusers8af07087e8210dff4263c70da4b1656c *DESCRIPTION 3b3d5e3a60f73fdc4789742318e00085 *NAMESPACE 13d9ab3fbb68605e336eaf29f98bd80a *R/BASS.R 8b87ef0a34c35ff30e98461c58265fb6 *R/ICC.R 0b7d8585ba23fe65a7ebcaf424c7ee2d *R/ICLUST.R ecaf6a79e563895c5b3859626c9adb86 *R/ICLUST.cluster.R 0078b0c55040f1631b5df22fbf96f24f *R/ICLUST.diagram.R 4e7420bb6217b6c46af5e2b341829664 *R/ICLUST.graph.R 47707adf3dd0a492c8381d87edfc2f25 *R/ICLUST.rgraph.R 43304e73cf7661a6cbfcb4f93f85729f *R/ICLUST.sort.R 2937f4c9a5cb634466837bca10d922c2 *R/Pinv.R 07845d2066c0dc7caf3654ff159552d4 *R/Promax.R 29b1741482dfa3214d2653e3470b3521 *R/SD.R 4f806943542b5e8ffafcf0ef2e63c484 *R/VSS.R 809456b6ac0ee3a8dfcc465dca04666f *R/VSS.parallel.R f8d7090c374a6f5d7f3e9bb093ffcd0d *R/VSS.plot.R ee83d3685618729f4a809479142f0ba2 *R/VSS.scree.R ed75be842790e7dfa77a1c535522273e *R/VSS.simulate.R de88fdb63bc08e5bfd4fe85ae4336ee6 *R/VSSem.R bd000d60c462cd306f7baf4baaa06950 *R/Yule.R e5c117032ae3d543420b212080570e8a *R/alpha.R 829a79ef9b5afa968036507b0acc9f1d *R/alpha.scale.R 3a2d5104157faaa639bc9fbc8f76bd7e *R/anova.psych.R 1f552f1bf78cc51f508c426d222713cf *R/auc.r dbe8b4d51da8a5a9284b23189daf6817 *R/bassAckward.R bce488643a51d1235d450871a329a9cb *R/bassAckward.diagram.r a9de1aec1a83f8fd77ec47f6b5e7523f *R/bestItems.r e6c816b33a5ce3b6c17195a90d559aa4 *R/bestScale.R b88a28ba09b081eb65155dee086fbb50 *R/bi.bars.R cad6f85a563d6cd28602362ed9baae0f *R/bifactor.R fa0d460e78e494e931289cf9203d3313 *R/bigCor.R 48212810f408eccb060652e0603733b1 *R/biplot.psych.R 0db3102d1d2a94f3ca64b47b65d0460f *R/block.random.R 1692ff58bc2e3a8fe7f0a31194d6ca1d *R/circ.sim.R ae8431c26cfba1e7680f174f215ddb3f *R/circ.sim.plot.R 1cda4a1ca1bc9b6e9a1965ec006e7169 *R/circ.simulation.R e8cde9d21cf4a426abb5c0f441c499f2 *R/circ.tests.R 34543cb45a3daee1c90291b4dcfaec37 *R/cluster.cor.R e1e60b3b6603b90d5315fbb62d1b400d *R/cluster.fit.R f1d9b87a04ccea8081dc082e62b87c78 *R/cluster.loadings.R fa5930cfcdb94f5ec8d9ac29136eceff *R/cluster.plot.R 861e6e0682d9d30322553d81ad283de4 *R/cluster2keys.R 8a991c4238614428a2977ae10f9b8103 *R/cohen.d.R 727ee779cba4b35601c0f67761ccf256 *R/comorbidity.R 02f5c23520d04eeff50459773faba599 *R/congeneric.sim.R 02f5c23520d04eeff50459773faba599 *R/congeric.sim.R 6ac21fbf5958af22e80a0128ce23efb3 *R/cor.ci.R 9c8453cf2b7541c4edf2f768d59f65df *R/cor.plot.R fd74b129ae2efec3e186e5d597bfbdea *R/cor2dist.R 90b4ae9a438ce547a41eee5a5194931c *R/corr.test.R fbbad89fd3f99a771eaaf3f356dcb9aa *R/correct.cor.R 511855e95ce46ef489dac8111a9c1dda *R/cortest.R fe8acb2f96feda0a660696a39ccd4447 *R/cortest.bartlett.R 895a69392b4f32bea94ab692126c83a4 *R/cortest.jennrich.R 6f10daea428ad8d9cd9457cd7ff8ca6c *R/cortest.mat.R 122ba6363838778649d84d70633f8dfd *R/cortest.normal.R a219142cce6bed487270f513968da1a3 *R/cosinor.R 97e47a24c095cb4b8b0524c1b9592e1d *R/count.pairwise.R 3347ba1f87c8dc33066be1a0dbd4c63e *R/crossValidation.r c630241141d68bc1c74b996319f96caa *R/cta.15.R 55da70d9c9bec31f46ff865582d29dc6 *R/cta.R 4d55e171e817dfb445a5c24d787acc29 *R/densityBy.r c008507c9fb46b61236fa66447c7898c *R/describe.R 45d3c0b76e31d438b398a67f2f1f721d *R/describe.by.R f68a01d91b2969554f3f7cb20d2dc134 *R/dia.arrow.r fd3fc64eaafbfb3771f597a51647b779 *R/dia.cone.R 8e559c67af21f36f3df4248e5497f68a *R/diagram.R 6f494a8655dd977c0a8cc51e82ecbe4c *R/direct.sl.R f2f4b8e0dd4d75afb4d86b598556dbea *R/draw.tetra.R 9d742934360e15147babad6dce5c5917 *R/dummy.code.R e7df2e7682994996c551c62ed0ffef97 *R/eigen.loadings.R b6e45d9e7f8a43ffab32363c88b9cbeb *R/ellipses.R 146c39385718e0b8f36f7772786ebb0d *R/error.bars.R b6346c2752330f2797ffd7ef1b4fc167 *R/error.bars.by.R cb63d95b6898016dd1867dc6e1bfb19e *R/error.crosses.R 5d54b89f41f54e9e7c00629ea8cf0f99 *R/error.dots.r 5296d038f210b0abacb326c842b8585e *R/errorCircles.r 55249417b9a430b24ac2db24518d2027 *R/esem.R 629ef2f9b07ac74ed565584a8de8fe27 *R/esem.diagram.R 1974d73fde0120a5ceca0ccfdb6d36e2 *R/extension.diagram.r 89a9d0ee2f846fc6b3687d575a2d99c7 *R/fa.R 447725ed3e1eca5f5f612c4bd1ef5a87 *R/fa.ci.R 625a5d0be816031e4b215eb37060226b *R/fa.diagram.R 8164716c93068c5dc354acf7b2725c02 *R/fa.extension.R b2f045a51110e376b73a021035d02033 *R/fa.graph.R c7ecd25c15ab5b8885b822ba50b15df8 *R/fa.multi.R e1c9d671d1c15c9b4cc4b3022fbd43cc *R/fa.parallel.R 369d295a92258e0a7faa9d934c4108d8 *R/fa.parallel.poly.R 1a4d7a711aca8c2cce59e9c58a0d0734 *R/fa.poly.R 0ebf383230c63013e156cd972f7d401d *R/fa.pooled.r 5ac4f5a2056cec2e4d15cdcfb17e3973 *R/fa.random.R 4af9392ce59f97081bea4daffdeeb924 *R/fa.rgraph.R 19473af1b7a1b25858d70bacf3d43ef1 *R/fa.sapa.R ca209aacc42c1f6fcbc69946d882dd6c *R/fa.sort.R ff1bf183f5dc2553404bf7fbf3ed8caa *R/faBy.R c964c48615c77c560f4e4f1f1411c1f0 *R/faCor.R f0f3e646b8ad5d9e7552bfab70f5cded *R/faReg.R a3290736465c25e3bb5e6927a642acdc *R/faRotate.r 21dc62f739854a8580719efd19ccfc5d *R/factor.congruence.R c851365f569d8fc0194bd0f6840529ff *R/factor.fit.R fc8fc1ecb18a85b3d810cbbdc7f69b38 *R/factor.minres.R 963be9f7f5143a09871c23b9a5044811 *R/factor.model.R 1930bb013a3d24a79719943f4733bcd5 *R/factor.pa.R aba30c0d13758979f8e600997d2e15b5 *R/factor.residuals.R fd8664c63e1e9c9c88fb504729a3bb32 *R/factor.rotate.R 35e6b2509117f9b8d7b389116b5fa7cc *R/factor.scores.R a224748bedc5abe7243de0d15db16981 *R/factor.stats.R 07305db6788f548ddfd81cef02f33b96 *R/factor.wls.R be81c3ba9fc5c603b031643cf90d907f *R/factor2cluster.R e10754196a212278bb50b3f669a3705e *R/fiml.R 4582be0ba4e7ace62870776df2cbc2a5 *R/fisherz.R 59a411c575963c65934529a2db1c589f *R/fix.dplyr.R d9cc5a54eb83059346f5f85cbeffdfb0 *R/geometric.mean.R 47fa5ab9e517de4e04419ffb8c32b91b *R/glb.R 3d45734b104d5337b15156483d7dbcba *R/glb.algebraic.R a158253d5a9c8008adb6f1a672bb1b43 *R/glbs.R 974fd71977dcdce0d542d24341ba7a3f *R/grouplag.R 1278e672ea0a5e11fa4e271030ff07d8 *R/guttman.R 2aec632103446472fa8c1c2f64149ae6 *R/harmonic.mean.R 99fd849f0e4f104e9f2ca8f9ea537acf *R/headtail.R 4fc7002c528f94ae3e3f58c9fd052edd *R/imputedR.r 11b91d0af2fb0086bc125713c1cc44f9 *R/interp.median.R ec5fbe362760a0d33e9e6cd837d2b3fd *R/irt.0p.R 1db07f94eb04b7e5f3b5e792a943b7b9 *R/irt.1p.R c3b63762311bb7ccf7851794ce4677ec *R/irt.2p.R 074d8e8fb33b28f1e4907b9aa6571468 *R/irt.discrim.R 054712cd453a3add4d3defdc893eaaaa *R/irt.fa.R 7a5fed074edc954eb4f9b70abfc56a23 *R/irt.item.diff.rasch.R 88d04add3789ee69a16312fed996cb63 *R/irt.person.rasch.R 3c2b15eae84f7e8c49515174ef5a1ce7 *R/item.dichot.R b8c0b2d89d5c89e4b32839e8b08fb61c *R/item.sim.R fd2b59d29361b2207859bf27a685e678 *R/kaiser.R 871ccd5d2c6ffae42090933a1144b486 *R/kappa.R 83b480391c2e60a58baf7ec1ed3df258 *R/lavaan.diagram.R bfa2ac8223e4963a82c4df8f6236c0fd *R/lmCor.R c8946903e3e54166eaade7ec8455d0dd *R/logistic.R 4738482f37b63db58477f8fd59a44bb1 *R/lowerUpper.R 6789c9df042a9a5c8a5eea23307dbe5f *R/make.congeneric.R 9fe4e9d266eda349572ebcef3b0a87c4 *R/make.hierarchical.R 57bccbaf3cbd2a46cb95149ee99119a5 *R/make.keys.R 3b1f25cd6bd4154e82d045093b20e1e4 *R/makerepeated.r 2381b4a824b5addac6f182d3735a73e8 *R/manhattan.R bcd98bc6e3255a86ddab0f1739851dac *R/mat.plot.R fce5a9f400c77081edc7e080ce5f7dab *R/mat.regress.R 659ae5887c1eee00f6d81982bc551cc4 *R/mat.sort.R ebfb848141f2e063822863870429a8eb *R/mat.sqrt.R b2ae54c2687563c2117e034c3adfe564 *R/matReg.r 2cef0a7e4980b9f150a88ad4089cc4fd *R/matrix.addition.R e62d996f401c95d98fc5eee0aca230ae *R/matrix.na.R 8cf78d71118751d7235a85ed183ddcdf *R/mediate.r 7efd423b076037ac3ef6b4fa9e984f55 *R/meta.schmid.R 4aa368a82f65a62d6ca324ddb807995e *R/misc.R ece308ca3ee5ea3232d20c93e81875ad *R/missing.cor.R d65a1a877f1bc255b31bfafa3676322e *R/mixed.cor.R e4367b44eed40bdb228d45e126b60ecf *R/multi.hist.R 714227d34f995a0b2b9b02ca40102db8 *R/multilevel.reliability.R 94b68f3aacdad02bc1ea1c0b8cc9dc83 *R/omega.R 16cd06a657e0d115dc7b031cf4440ef3 *R/omega.bifactor.R 15f3f5ca7ae16424a394341b235dbce4 *R/omega.diagram.R 78f13db07b78bef2ed6e956faab716d1 *R/omega.graph.R 711f13fcb69b9474586eaffc00b0ce61 *R/omega.sem.R d6911a9020cf281c24aa940d21e62d9d *R/omegaSem.R f8e38cba148b68289dc2327e65d95e19 *R/outlier.R 9b8b3f061113bd944d5719d2e1dead2d *R/p.rep.R c5ddd08c4f4f9fc036462f00606f2835 *R/paired.r.R 69a600a0819a1867b4bcaa65869428e8 *R/pairs.panels.R be8630a3d38b4799174096584720521a *R/parallel.irt.r 1f3d22b0be091a98cae6e0111f893839 *R/parcels.R f4e48056c87355534c774b4f7d52cc68 *R/parse.R 78b24949ca135f6cf49dbef39648659e *R/partial.r.R 611567470605e74b7842bfe152de6b38 *R/phi.R a5ae7331407fdc040d03c90fa7bef381 *R/phi.demo.R 0bdb825c5c742af73fcb1326c0854368 *R/phi2poly.R 69a95741e9f0aedc30fedff81b4d2154 *R/plot.irt.R ec78fb1d1de022a975b89a6fa77b13f4 *R/plot.psych.R 16ae5b608404974f0e551db96d277e6d *R/plot.reliability.r fc8509441060138662879d1433dc1c28 *R/polar.R 8b7c8a676b41c656c541740af079b707 *R/polychor.matrix.R ddbf6c1f4c5314251442aaab70091901 *R/polychoric.R 6b3df3fd5ca02ffe702a6e902b9c87cb *R/predict.principal.R 29c9a3177a7bd66421a1401fc364a915 *R/principal.R 0c7add38a67a65a9e77ce9251a470834 *R/print.factor.pa.R 94081b86614fe590e019909b662ea786 *R/print.psych.R 0151982000b174a3ce0f2e300d2b977c *R/print.psych.fa.R 17c83e8fc297c4a228f72d76063e4559 *R/print.psych.iclust.R de57adccdf4c6e5cd152b441e5dbdf9f *R/print.psych.mediate.R 126f0280715ed6f1c7c29a46832735a7 *R/print.psych.omega.R e62bb9ce4f598699535e4ae95bc9e898 *R/print.psych.schmid.R bd844d2f3782f1d546db9387f3c050f9 *R/print.psych.stats.R 8d32b132290968175212c4d4318f0c8f *R/print.psych.vss.R af44fccce9d1895e8b5f0199da865bf2 *R/psych.R 6fc6dd6ef45ec8e243af07e42e82ae38 *R/r.con.R 8ff6bc06c93049ead62c86d591cf8dea *R/r.test.R 641538767ad214fefdbe0a1ffe57291c *R/reliability.R 7db59751de495a88cf82ab885b864cbc *R/rescale.R 6832a93e284d8f36afe5ed245f73b978 *R/residuals.psych.R 5ceb86db3036dbe92fd153073c615d59 *R/reverse.code.R d4be2ecb1314c8994b3b161cea8268c1 *R/reverseKey.R 33993e4822f0c5214b481df0b63c1502 *R/rmsea.R 87fd7c4266c93d1588d37183964e0ce1 *R/scaling.fits.R 3db0878d75c9796740232b6ecfe82dc6 *R/scatter.hist.R ea15bb38b33ed29aeba04e0307f37e8e *R/schmid.R a8e68675f21774507cf9526bc25fc454 *R/score.alpha.r 0d7221b7fbe80e5bfc8e6e66dafbb6f6 *R/score.irt.r 3227e30a3358ec3a647656aa43e94a4d *R/score.items.R ea10c36dbb3b79ee6c16d197a719764b *R/score.multiple.choice.R 479e5b2782599862ecc57c1d1b1aa793 *R/scoreBy.r 33ac8532e26ce775ce6f3d2e0b839684 *R/scoreFast.r 1766a8e4d64f01da1efba1b3a86d9efe *R/scoreOverlap.r 660a1b78df1f9950e6cb740579411eab *R/scoreWtd.R 9679b2fbe80d61f6fd6907574bdff868 *R/sdt.R 7cf7707da2889f9c82df83ecc0986fa2 *R/sim.VSS.R 27fac5d60d9340425b20ad27e3a5f604 *R/sim.anova.R a010402da6c59b4470523239653cf2a0 *R/sim.circ.R 12abc7b3347f3b46a5303fb7ef2c4042 *R/sim.congeneric.R 44e7a2f31b5364cd74e8f454ed8ee658 *R/sim.dichot.R 8f1618af41477c99bfae2d1ed2f1d5c1 *R/sim.hierarchical.R 78f545b254a8af24f7e974f3808b2d88 *R/sim.item.R 20cbae4218311559877c3715907530dc *R/sim.multilevel.R 8945e972247708b9a694b6b070dc8144 *R/sim.parallel.r 9c91f93d4efa32d3d92c8ba08ca6b7fd *R/sim.structural.R abbb79a76cd0213dab6f3a29e59d963f *R/simCor.r 1304546b938c055e8d73e20892a8b1a1 *R/simGene.R eecef57c490d4e5c7493b41c46f1874b *R/simulation.circ.R 7a529c97ccaf1ae232c4da52a9cfaff0 *R/skew.R b175b9f540ef2e25336ba5d51b5df91c *R/slipHalf.R 92653249587ab2eec617de48acd20689 *R/smc.R 6a9a3a45a6cd73860288cd8e4c4302a4 *R/spider.R 9a1689f5beb09ec27f2aec8ee38648eb *R/splitHalf.R 7e10359835c0fc0d2539de0d42bd850f *R/stats.by.r 83ea3a47f741244a0b83f0ba773e0996 *R/statsBy.boot.R 1abaadd29a04955b9e4577e3d3dd5a49 *R/statsBy.r 2ab8f84ff2137b01be2e82f27d16d2c1 *R/structure.diagram.R b676fed8b8ff2d810658ee1c035791c0 *R/structure.graph.R 8a9cccb7dac465fb4e574aac613eb413 *R/structure.list.R a53c337ffd04a7ab45e46ea6084d73d6 *R/structure.sem.R 923a1bd22fe71061396b2b15a7c9db29 *R/summary.psych.R 7e18539945edb5f2ee975b534c4ff9a0 *R/superMatrix.R 12e0ae0408553ceb747326d10541d6de *R/table2matrix.R 567997e82f78f065cf0e41820cd1200a *R/target.rot.R d749df7eb65ddfd8050bd0b676665366 *R/tenberge.R bc3fced45a4ac8103d9df50cbea9cfe2 *R/test.all.R 552f7f184c3fdc1a64f561586a2e3014 *R/test.psych.r ec5cfcac7d494b2034f754e9b9976c1a *R/testRetest.r 3d3162c8d6d1187731180349823b1e17 *R/tetrachor.R ac5c588827e2b79c3b685c74f4f10281 *R/thurstone.R e70757153ddccbd41bb360dc33db8c52 *R/toDotty.R 5c81c90d559ddd61d944032f33a46c0a *R/unidim.r 06d94c969445422b79d2db3407ed19eb *R/validity.R d49d4eb35da9e11010df63cb6b0aacb8 *R/winsor.R 8faf250d78a120ba4c7ea2186efddd24 *build/partial.rdb 99f2a0dd7d05b620fb6e38accc93a576 *build/vignette.rds b448c67916d03c7e63fcdb984133b1c0 *data/Bechtoldt.1.rda 3c1e24d0897e155babfa9494ec271d6b *data/Bechtoldt.2.rda 4b8f9be884c4c128296d882c00c9eec6 *data/Bechtoldt.rda 23bd1e4540511207f0ab6b5eb537f5ed *data/Dwyer.rda 79422761eafb2ffabe9cae11ae825bff *data/GSBE.rda 0cbf35e1c57054d07c328f4299f03549 *data/Gleser.rda e9217fc1603d923e1ec47924a08a41a0 *data/Gorsuch.rda e6978feacaf4b5dc003f48efd448f7ab *data/Harman.5.rda 4ed269727be020fd2903b875eb8c7ba6 *data/Harman.8.rda 0cacb5fbdf03c2de6f6a669c51f653b1 *data/Harman.political.rda 6c0df8ca3bb3bf4f2ed0d5a5bfed2238 *data/Harman.rda 2f19b00823191e03ebe20c44b7a48454 *data/Holzinger.9.rda f8f687eb4c9f8ac5fba06c2d87c7ccb5 *data/Holzinger.rda 2d6d72563d5692cb7b396d4bc5bd5db8 *data/Reise.rda ad5359db496b5acb0d21e72efc083dcc *data/Schmid.rda d9eacd12addc4ac3d82a76f6bb766b26 *data/Tal.Or.rda f8011257885eaf2d1cf4471beb3bf12b *data/Thurstone.33.rda 229e72d3e655bab2d14496b2a2cd7971 *data/Thurstone.33G.rda caa6f0c510638df36401fbb9d408786e *data/Thurstone.9.rda bcc9317a04ec8c6e27a52b2587f240e8 *data/Thurstone.rda e8802670dcc270b47bbd7d244dbe048b *data/Tucker.rda cee3e1077cd667577bee5f03f9852093 *data/bfi.rda b31be1785c2c0b0c79b6d10858ee52a8 *data/bock.rda e4d1ce3c98c451afe63b70e5e894b1c0 *data/cattell.rda 750cfdfc8737a90915fd746403696f5d *data/sat.act.rda eef271883aaafe139c572158fde02134 *data/tal_or.rda 16da6b1e624fce7a452326bfab7d390d *data/withinBetween.rda 62500fd50872a62bf718a03a9b893a42 *inst/CITATION e793deb67a02c404aa3516bbf6026424 *inst/NEWS.Rd 2ae96912891fe51c00d72afa3d1a6264 *inst/doc/scoring.R 8bf9d8c6019518573c9584228267203a *inst/doc/scoring.Rnw 56f58f4b81ff153a8f60500ec1f42da7 *inst/doc/scoring.pdf 7d20d6c5445e72fcb4ff2871d2f86bf1 *man/00.psych-package.Rd 8fed7b2b89b7bb8b48d111f3cdcec44c *man/AUC.Rd 566f1e936716b525e1597b6cdfafec13 *man/Garcia.Rd d17cc6e8c6a59208a5af65de5fa47ab7 *man/Gleser.Rd d1c648092201e2fd2185c5aa2bdd2d9b *man/Gorsuch.Rd 594ede8fb88a4c7f06130da4519b8dfc *man/Harman.Rd ad90683c20920400bc8258791e44af30 *man/ICC.Rd 5e028c29f7fb5e49b3f7055b4ce27163 *man/ICLUST.Rd d156c747dde236787e75192bef7efe8a *man/ICLUST.cluster.Rd a5edfa958431c01cb15b9452565fa9ba *man/ICLUST.graph.Rd b1584182ce8a48713a69ba3081f68e4e *man/ICLUST.rgraph.Rd e0d42c620acfbaafbd51120fc5776a85 *man/ICLUST.sort.Rd 99c455df708d18d6f1c6665d500091f9 *man/KMO.Rd 7edfe486ad5526dd71507466dc05c039 *man/Pinv.Rd 39d4e1c81fa06826c02e559fd1e695e0 *man/Promax.Rd 0dd624ebffa5498bdb2f6f2d60799953 *man/RMSEA.Rd b0247086c74b2cd4f5a1abf93eb3cf28 *man/SD.Rd 6ed5ee33a84ddd70ba74973fd791e1ca *man/Schmid.Leiman.Rd 15027d277a0450ca0d5424e5d088961f *man/Tucker.Rd 0760fdf7b329ffa86c9863e0bd4e9103 *man/VSS.Rd d79dd6358a3840e4e06171581e0cff76 *man/VSS.parallel.Rd 91010e4353f7b1d70a0c9522ee2c54f5 *man/VSS.plot.Rd 720f2d3b1da266b2bbceade3c5f71eae *man/VSS.scree.Rd a2f4c2247cdfeaa4ead1e4f3459dc9b9 *man/Yule.Rd d9dd7d724664a9848aa183adf8dcef16 *man/alpha.Rd 480047a987ba40682abadf6289741138 *man/anova.psych.Rd 996ffff4f7f399e83b386c41f78f7612 *man/bassAckward.Rd e2e36fc2658c85ed6dbed183a58dd20e *man/best.scales.Rd 851d8b17454811ecdef69f639cf1c891 *man/bfi.Rd d69e7307a36b6848b971c199b2a16652 *man/bi.bars.Rd 77a1b9a7f67b4bb453b1d2a96d159b16 *man/bifactor.Rd 7fb22113bea405e7c3315ed222e110ed *man/bigCor.Rd e57d9b06d25645e1cbc40a9c0ac3855b *man/biplot.psych.Rd 19fd0187717edf07ce10d823604ab5f1 *man/block.random.Rd 7df897b13c377a54584eb551881587ba *man/bock.table.Rd 215ba4f0ea880597a98582c3f0806876 *man/cattell.Rd 00a36e9e71e11e29cedd43f0d69a7dfb *man/circ.tests.Rd e8a43f2f5538e69cd928f4decb6fbb07 *man/cluster.cor.Rd cbfc59b49670ea02e90b45dca2d65657 *man/cluster.fit.Rd a5da3d7d71594147fc2ec133db25ad60 *man/cluster.loadings.Rd 6e0e6f1f89a2bbae8a202e923b904397 *man/cluster.plot.Rd 8a2b12d771d57672b7abe446e68b058c *man/cluster2keys.Rd 9da550b28b856d7feda9cc4dff0d6134 *man/cohen.d.Rd cc731a9871fee22ff7789011d71c716a *man/comorbidity.Rd 0a6fceafd7ac147a82428134c3f1e63b *man/congruence.Rd fb2c703371dc18cd114fcbf07899766b *man/cor.ci.Rd 93218eb6a88c0c019a8f5118e82f55c5 *man/cor.plot.Rd 8682d3cba4da44fd878aa25c6f084c22 *man/cor.smooth.Rd 695bb5f5976eec427b24dc2e5cef1067 *man/cor.wt.Rd ef02a403567a3dacf73957a4400ec414 *man/cor2dist.Rd c69c7082668301ee6fce0f18df924d53 *man/corFiml.Rd aa007c9c13aa27fd6852f33f1e7cb76b *man/corr.test.Rd 59d66d193279401d8baf890cb64f0cb6 *man/correct.cor.Rd f6120ed02c2f4e47d89b03d5b71cc200 *man/cortest.bartlett.Rd bf5217fee116390ed00c7cef67577e75 *man/cortest.mat.Rd ab32be387f7c3dc25dfc04b48d4b06a7 *man/cosinor.Rd 9b4297f0e18dd7ce606509d71f5a0ffa *man/count.pairwise.Rd a0b5ef53382057e52c42e46f868929f4 *man/cta.Rd 4e6e6c00c3ff096daba413bbe0e9e903 *man/densityBy.Rd 89b88eacec0f8473a15a05217c514b66 *man/deprecated.Rd 31d76203f8899cdfc66534694b319d9e *man/describe.Rd 0a0d5999324426d712b25da62ad768b4 *man/describe.by.Rd 5ea3d06257d1fa929047e744f8f005b2 *man/diagram.Rd c3ccc2c163b6b0f5f7ae4ab58227d048 *man/draw.tetra.Rd 42eb988a3461cb9dcb67c5f6d74b3104 *man/dummy.code.Rd 4f2ffa5d5cf2e40d2b3f8b57448d5673 *man/dwyer.Rd 32e59842a63fdbd8744fe812278c9d48 *man/eigen.loadings.Rd 292610aade8c543e29f3bf38c44b0e37 *man/ellipses.Rd 2b70abda3b9ada18c3ce9abc329970ba *man/error.bars.Rd 2afd4f3fb9e66813ef584d55e795a525 *man/error.bars.by.Rd eab1274c45abff6c0518b0c083a796d5 *man/error.circles.Rd 9f5725ab170df7786b3e9924cb906693 *man/error.crosses.Rd d7610b9d05dd1aa6419a6fd5222593f2 *man/error.dots.Rd 210d04243f17de0d1598407af42c0d30 *man/esem.Rd bcb01a857a8c3fae267facb0c11f26e2 *man/fa.Rd 54807b246a6afbee9463d092f43c1c3d *man/fa.diagram.Rd 0b25dfced3f365a2e8507c270b561520 *man/fa.extension.Rd 3ab83890687d637436b9568e9865d901 *man/fa.lookup.Rd fa4830b16f41fd0d7b523e3c1c210b9e *man/fa.parallel.Rd 580728e13db10ed6e9d76ce1c0259474 *man/fa.random.Rd 8927e72b893c24b476459f561e2fbeee *man/fa.sort.Rd 955b3b5d3c1c2eaa82a16520b3ea5c1c *man/faCor.Rd c6de06a3501a5014ac2cc952fbdbf397 *man/faMulti.Rd c21af7eef085a380d49d90ac573cedb5 *man/faRotate.Rd 02e3d92c488c16130a70db16fc0c425e *man/factor.congruence.Rd b70af33e70ef263acdba49a6f70db2cb *man/factor.fit.Rd 344204b10c745f704daa2ca6c80d2031 *man/factor.model.Rd 3047cade6281ca00132ff92bd3ee2dba *man/factor.residuals.Rd b24783202901707d7a86ce0b884b4cb2 *man/factor.rotate.Rd cf745b2abd1e313c60fc2674501db0da *man/factor.scores.Rd 5984833c8be3ffe16e283c8b4435a836 *man/factor.stats.Rd 2811737bd7e12e91b9868eadf816a106 *man/factor2cluster.Rd bc73c0a5b52f89e486834f0376f53575 *man/fisherz.Rd daa6e4a18d4c4bb436fbb70624844013 *man/fparse.Rd f29e3fb2b2794c45b46eb188808b71d2 *man/geometric.mean.Rd d0e84e368e3b4bc5265a2a88579d7ab6 *man/glb.algebraic.Rd 230a9780c02a0daffcc1475319013f46 *man/guttman.Rd 25fb1b56f8b6a902c0753a739d9e9894 *man/harmonic.mean.Rd 6d795d10e2820b6dd37f3ac04296ea72 *man/headtail.Rd 894fdb648cd187f77659f8e6217b223e *man/iclust.diagram.Rd 0c12ab235432f00043ff10f5b9692f5d *man/interp.median.Rd 3986e81d38fe685e1bf6b0be1cdb9857 *man/irt.fa.Rd 83eb4be3f0da80d9791b1d5c8409b709 *man/irt.item.diff.rasch.Rd 15a268ecfd40c5b1dfb1233781582006 *man/irt.person.rasch.Rd e3183e3a1fcc6f4e344ca71e45d6a002 *man/irt.responses.Rd 6e11d348cc8231d23c918531d9e155c4 *man/kaiser.Rd af37e1f303b0cf083bb39648f0d332fb *man/kappa.Rd 875fcfcbbc2f36d9ab434351fe257acc *man/lmCor.Rd 0597e0e81ea96be08cfaa9c9201a5a86 *man/logistic.Rd 254ead3db8eadbd5dbab6776e5875821 *man/lowerUpper.Rd 6edd3fadfaa8d51e3c39b701c74ffd99 *man/make.keys.Rd db77c28de2728c6d8d4ee0bcc39afced *man/manhattan.Rd 78ddd3301587bad307199d65531f0f2c *man/mat.sort.Rd 42bd24ab4c5723382fbd3e5715edcc06 *man/matrix.addition.Rd 8575b4528a69695deaefd0dd942c1dc5 *man/mediate.Rd ca9c27c60bafdf4b13876b50a5df779e *man/misc.Rd d584d999c16e1460b0781bf39d85b114 *man/mixed.cor.Rd 5b2622cfeec3c373b0313e05266ce539 *man/mssd.Rd 538b26dbcdac37da60a81717dbf9a96d *man/multi.hist.Rd 0bacc4965b69eabdc06d5eb6cc7c011b *man/multilevel.reliability.Rd a6d8dc9505dfb821926614c1dfd6b7c7 *man/omega.Rd 97e4db03fc417087e64ac73f5328b2f6 *man/omega.graph.Rd 2c7abbd08f95ab81caabe590ee75cd8b *man/outlier.Rd 95585a45807d1e081ba500dbcd93b8c4 *man/p.rep.Rd 26f882b454a5292aa1daed00aae36576 *man/paired.r.Rd 7123ae05e438306a8b089c11cc8a2773 *man/pairs.panels.Rd 7b4ba43deea17ae9aa131a7cc7f331fc *man/parcels.Rd 43499c89b7422fbb926fc9e1285fbf62 *man/partial.r.Rd 52846574053a12277f89f4a949413ac5 *man/phi.Rd 6a3b7dbed8aa1986d3ca76a7920043e5 *man/phi.demo.Rd dbcfd2f0fcb58bf03cdf17f3a07cd92d *man/phi2poly.Rd 251f2fda7b32ec98f5b030549c726d67 *man/plot.psych.Rd ff777922a14dda0f9dd64eb48de0990a *man/polar.Rd 639699b328f2703b12b1daa6c608652f *man/polychor.matrix.Rd c1bb5eb7673788c773007cec24962ca2 *man/predict.psych.Rd f503798d2bdccbdf83a4efa2c3361ca9 *man/predicted.validity.Rd 33849866ffde2dc9b993a641a8572298 *man/principal.Rd 0b372c62dafe8bf77d0427262f8a3eaa *man/print.psych.Rd ac1680c210a4135fd36dfeb2a382599c *man/r.test.Rd 07bd1a37a4c44dd170bf9290a3cb8fd9 *man/range.correction.Rd a79ed326797e83b29efd1ddba1b1ff99 *man/reliability.Rd f99ff7d11e369be1804bb523c32bfacf *man/rescale.Rd 6a26c7d9a5395b3b88432968a0e2e060 *man/residuals.psych.Rd be47add74e88900e4b798fff0c0752c7 *man/reverse.code.Rd 8a0bd3874bda78ad6a7f125a12abb2cd *man/sat.act.Rd 2da775ad361fe3e94063c8e0280ec817 *man/scaling.fits.Rd 9c31a65e07674b0247243df216db5d9a *man/scatter.hist.Rd 849634ebd3efb52e29ef044cb3e5b362 *man/schmid.Rd bb2a36477041a966a1aec50359978fda *man/score.alpha.Rd e9bac60c40ad2f59155974823158962c *man/score.irt.Rd e43e5b639e3fb76596e45c17e2197f26 *man/score.items.Rd dd42384f95aa37aa68585c6fc2401fbc *man/score.multiple.choice.Rd a9da94756c635c92a3411aeaa77e3d72 *man/scoreWtd.Rd 255b057096e6c440720f57e1c78e2e6c *man/scrub.Rd 2fdd1df8bd8932cfc8495397bf13d5fd *man/sim.Rd 10b09f0486026c35c86d3eb4414d4b7f *man/sim.VSS.Rd 9d6b64a08e47d559040c81556ae17590 *man/sim.anova.Rd 6cc6a07221580e45a4be136ef503204c *man/sim.congeneric.Rd d552db7a46e29a675c34937ac5613f66 *man/sim.hierarchical.Rd 199144290ca9374438e293374a3eafa1 *man/sim.irt.Rd 177f72776145ed7599eb000eed310228 *man/sim.item.Rd 64e51ad0753b5f6ac20f1ece95e7e688 *man/sim.multilevel.Rd 53a09a24aeca2d337909c06b405844b6 *man/sim.omega.Rd af1c619ce06b263e9afdebfa25008bc9 *man/sim.structural.Rd 89826aa3f39a3926aaf8bdca71464252 *man/simulation.circ.Rd 03d8b2964109b0fd7adf7111e7a0a58b *man/skew.Rd b5a0d72fb96af7bd35e8484d1ded7221 *man/smc.Rd 6c2d029ec4f357268db5d603d5851072 *man/spider.Rd 614721e6f6b87685cc0a3ad4e86f8aef *man/statsBy.Rd 6600e5c4683fd322a707a18140699873 *man/structure.diagram.Rd 4d3c9ec5c90c85dec16ef57fd26acdcc *man/structure.list.Rd ece56f42c8dad26b41dfb40f2bb41cb7 *man/super.matrix.Rd 5413a4147795350964cccebe92fd396c *man/table2matrix.Rd 65b03406ddf1ba8ab8209ed4c3f07929 *man/tal_or.Rd d2880cbebca047c3f4af52c02e83af57 *man/test.irt.Rd d5726f5e448135f4b8bbc733b7b947b8 *man/test.psych.Rd fdeb15df1f971f0612e905cae8a25ddc *man/testRetest.Rd 113cc0ddd8a1e845e885c1f9f9d1fb36 *man/tetrachor.Rd 7a8df2ea6f507696eaff8b7ab1f43bf4 *man/thurstone.Rd 9eec50d1cd2208bce401da2c58c29d35 *man/tr.Rd 22061c093ec806025481743c8762a920 *man/unidim.Rd a97beb62973191f13316725510371abc *man/winsor.Rd ccb9366f57062e9ea9346f0bff4f1782 *man/withinBetween.Rd 9bdd90bf72353c40bb4ce0fb15c46477 *tests/testthat/ReadMe.text 14275f989cfa286e8916767a21082790 *tests/testthat/test_ICC.R 87f3f586c433c64c35e9fb01c6b9f102 *tests/testthat/test_corPlot.r cbbb798e3dfd885bb00b2b822b4612bf *tests/testthat/test_fa.r d0d7e48cfab3a719fe9bd68445a0ad9a *tests/testthat/test_lmCor.r 814f232b38a8b4be8c4ef4cc61aaf199 *tests/testthat/test_lowercor.r a9282ddf0790a281b08c1d0d4c68f14b *tests/testthat/test_mediate.r 0154ffd9fe42d1297d94f8cdbaaf550f *tests/testthat/test_omega.r 83f9723597f6853156e32f25a1895868 *tests/testthat/test_splitHalf.r a5bb8efae03df2abe8032979f40fc9aa *tests/testthat/test_tetrachoric..r 8bf9d8c6019518573c9584228267203a *vignettes/scoring.Rnw psych/inst/0000755000176200001440000000000014551520565012365 5ustar liggesuserspsych/inst/doc/0000755000176200001440000000000014551520564013131 5ustar liggesuserspsych/inst/doc/scoring.Rnw0000644000176200001440000007744114550614671015304 0ustar liggesusers% \VignetteIndexEntry{Scoring scales with psych} % \VignettePackage{psych} % \VignetteKeywords{multivariate} % \VignetteKeyword{models} % \VignetteKeyword{Hplot} %\VignetteDepends{psych} %\documentclass[doc]{apa} %% \VignetteEngine{knitr::knitr} %\VignetteEncoding{UTF-8} \documentclass[11pt]{article} %\documentclass[11pt]{amsart} \usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. \geometry{letterpaper} % ... or a4paper or a5paper or ... %\geometry{landscape} % Activate for for rotated page geometry \usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent \usepackage{graphicx} \usepackage{amssymb} \usepackage{epstopdf} \usepackage{mathptmx} \usepackage{helvet} \usepackage{courier} \usepackage{epstopdf} \usepackage{makeidx} % allows index generation \usepackage[authoryear,round]{natbib} \usepackage{gensymb} %\usepackage{longtable} %\usepackage{geometry} \usepackage{amssymb} \usepackage{amsmath} %\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png} %\usepackage{Sweave} %\usepackage{/Volumes/'Macintosh HD'/Library/Frameworks/R.framework/Versions/2.13/Resources/share/texmf/tex/latex/Sweave} %\usepackage[ae]{Rd} %\usepackage[usenames]{color} %\usepackage{setspace} \usepackage{fancyvrb} % to allow us to define Sinput \bibstyle{apacite} \bibliographystyle{apa} %this one plus author year seems to work? %\usepackage{hyperref} \usepackage[colorlinks=true,citecolor=blue]{hyperref} %this makes reference links hyperlinks in pdf! \DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png} \usepackage{multicol} % used for the two-column index \usepackage[bottom]{footmisc}% places footnotes at page bottom \let\proglang=\textsf \newcommand{\R}{\proglang{R}} %\newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\fun}[1]{{\texttt{#1}\index{#1}\index{R function!#1}}} \newcommand{\pfun}[1]{{\texttt{#1}\index{#1}\index{R function!#1}\index{R function!psych package!#1}}}\newcommand{\Rc}[1]{{\texttt{#1}}} %R command same as Robject \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpkg}[1]{{\textit{#1}\index{#1}\index{R package!#1}}} %different from pkg - which is better? \newcommand{\iemph}[1]{{\emph{#1}\index{#1}}} \newcommand{\wrc}[1]{\marginpar{\textcolor{blue}{#1}}} %bill's comments \newcommand{\wra}[1]{\textcolor{blue}{#1}} %bill's comments \newcommand{\ve}[1]{{\textbf{#1}}} %trying to get a vector command \usepackage{fancyvrb} %this allows fancy boxes \fvset{fontfamily=courier} \DefineVerbatimEnvironment{Routput}{Verbatim} %{fontsize=\scriptsize, xleftmargin=0.6cm} {fontseries=b,fontsize=\scriptsize, xleftmargin=0.1cm} \DefineVerbatimEnvironment{Soutput}{Verbatim} %{fontsize=\scriptsize, xleftmargin=0.6cm} {fontseries=b,fontsize=\scriptsize, xleftmargin=0.1cm} \DefineVerbatimEnvironment{Binput}{Verbatim} {fontseries=b, fontsize=\scriptsize,frame=single, label=\fbox{lavaan model syntax}, framesep=2mm} %\DefineShortVerb{\!} %%% generates error! %change the definition of Sinput from Sweave \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontseries=b, fontsize=\scriptsize, frame=single, label=\fbox{R code},xleftmargin=0pt, framesep=1mm} \DefineVerbatimEnvironment{Rinput}{Verbatim} %{fontsize=\scriptsize, frame=single, label=\fbox{R code}, framesep=1mm} {fontseries=b, fontsize=\scriptsize, frame=single, label=\fbox{R code},xleftmargin=0pt, framesep=1mm} \DefineVerbatimEnvironment{Link}{Verbatim} {fontseries=b, fontsize=\small, formatcom=\color{darkgreen}, xleftmargin=1.0cm} \DefineVerbatimEnvironment{Toutput}{Verbatim} {fontseries=b,fontsize=\tiny, xleftmargin=0.1cm} \DefineVerbatimEnvironment{rinput}{Verbatim} {fontseries=b, fontsize=\tiny, frame=single, label=\fbox{R code}, framesep=1mm} \newcommand{\citeti}[1]{\begin{tiny}\citep{#1}\end{tiny}} \newcommand{\light}[1]{\textcolor{gray}{#1}} \newcommand{\vect}[1]{\boldsymbol{#1}} \let\vec\vect \makeindex % used for the subject index \title{Using \R{} to score personality scales\footnote{Part of a set of tutorials for the psych package.}} \author{William Revelle\\Northwestern University} %the following works only with apaclass %\affiliation{Northwestern University } %\acknowledgements{\\ contact: William Revelle revelle@northwestern.edu \\ %Version of \today{} %} % % %\shorttitle{Scoring scales} %\rightheader{Scoring scales using \pfun{psych}} %\leftheader{PMC Lab} \begin{document} \maketitle \abstract{The \Rpkg{psych} package \citep{psych} was developed to perform most basic psychometric functions using \R{} \citep{R} A common problem is the need to take a set of items (e.g., a questionnaire) and score one or more scales on that questionnaire. Scores for subsequent analysis, reliabilities and intercorrelations are easily done using the \pfun{scoreItems} function.} \tableofcontents \section{Overview of this and related documents} To do basic and advanced personality and psychological research using \R{} is not as complicated as some think. This is one of a set of ``How To'' to do various things using \R{} \citep{R}, particularly using the \Rpkg{psych} \citep{psych} package. The current list of How To's includes: \begin{enumerate} \item An \href{https://personality-project.org/r/psych/intro.pdf}{introduction} (vignette) of the \Rpkg{psych} package \item An \href{https://personality-project.org/r/psych/overview.pdf}{overview} (vignette) of the \Rpkg{psych} package \item \href{https://personality-project.org/r/psych/HowTo/getting_started.pdf}{Installing} \R{} and some useful packages \item Using \R{} and the \Rpkg{psych} package to find \href{https://personality-project.org/r/psych/HowTo/omega.pdf}{$omega_h$} and $\omega_t$. \item Using \R{} and the \Rpkg{psych} for \href{https://personality-project.org/r/psych/HowTo/factor.pdf}{factor analysis} and principal components analysis. \item Using the \pfun{scoreItems} function to find \href{https://personality-project.org/r/psych/HowTo/scoring.pdf}{scale scores and scale statistics} (this document). \item Using \pfun{mediate} and \pfun{lmCor} to do \href{https://personality-project.org/r/psych/HowTo/mediation.pdf}{mediation, moderation and regression analysis} \end{enumerate} By following these simple guides, you soon will be able to do such things as find scale scores by issuing just five lines of code:\\ \begin{Rinput} library(psych) #necessary whenever you want to run functions in the psych package my.data <- read.clipboard() my.keys <- make.keys(my.data,list(scale1 = c(1,4,5),scale2 = c(2,3,6)) #etc my.scales <- scoreItems(my.keys,my.data) my.scales \end{Rinput} The resulting output will be both graphical and textual. This guide helps the naive \R{} user to issue those three lines. Be careful, for once you start using \R, you will want to do more. Suppose you have given a questionnaire with some items (n) to some participants (N). You would like to create scale scores for each person on k different scales. This may be done using the \Rpkg{psych} package in \R{}. The following assumes that you have installed \R{} and downloaded the \Rpkg{psych} package. \section{Overview for the impatient} Remember, before using \Rpkg{psych} and the \Rpkg{psychTools} packages you must make them active:\\ <>= library(psych) library(psychTools) @ Then \begin{enumerate} \item Enter the data into a spreadsheet (Excel or Numbers) or a text file using a text editor (Word, Pages, BBEdit). The first line of the file should include names for the variables (e.g., Q1, Q2, ... Qn). \item Copy the data to the clipboard (using the normal copy command for your spreadsheet or word processor) or save it as .txt or .csv file. \item Read the data into \R{} using the read.clipboard command. (Depending upon your data file, this might need to be read.clipboard.csv (for comma separated data fields) or read.clipboard.tab (for tab separated data fields). \item Or, if you have a data file already that end in .sav, .text, .txt, .csv, .xpt, .rds, .Rds, .rda, or .RDATA, then just read it in directly using \pfun{read.file} (from \Rpkg{psychTools}).. \item Construct a set of scoring keys for the scales you want to score using the \pfun{make.keys} function. This is simply the item names or numbers that go into each scale. A negative sign implies that the item will be reverse scored. \item Use the \pfun{scoreItems} function to score the scales. \item Use the output for \pfun{scoreItems} for further analysis. \end{enumerate} \section{An example} Suppose we have 12 items for 200 subjects. The items represent 4 different scales: Positive Energetic Arousal (EAp), Negative Energetic Arousal (EAn), Tense Arousal (TAp) and negative Tense Arousal (TAn, also known as being relaxed). These four scales can also be thought of a forming two higher order constructs, Energetic Arousal (EA) and Tense Arousal (TA). EA is just EAp - EAn, and similarly TA is just TAp - TAn. \subsection{Getting the data} There are, of course, many ways to enter data into \R. \subsubsection{Reading from a local file} Reading from a local file using \fun{read.table} is perhaps the most common procedure. You first need to find the file and then read it. This can be done with the \fun{file.choose} and \fun{read.table} functions. \fun{file.choose} opens a search window on your system just like any open file command does. It doesn't actually read the file, it just finds the file. The read command is also necessary. \\ \begin{Rinput} file.name <- file.choose() my.data <- read.table(file.name) \end{Rinput} Even easier is to use the \pfun{read.file} function which combines the \fun{file.chooose} and \fun{read.table} functions into one function. In addition, \pfun{read.file} will read normal text (txt) files, comma separated files (csv), SPSS (sav) files as well as files saved by \R (rds) files. By default, it assumes that the first line of the file has header information (variable names). \pfun{read.file} is included in the \Rpkg{psychTools} package which needs to be installed (once) and made active using the \pfun{library} function. \\ \begin{Rinput} my.data <- read.file() #locate the file to read using your normal system. \end{Rinput} \subsection{Reading from a remote file} To read from a file saved on a remote server, you just need to specify the file name and then read it. By using the \pfun{read.file} function, you can read a variety of file types (e.g., text, txt, csv, sav, rds) from the remote server. e.g., to read the file \pfun{https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt} we just ~\ <<>>= file.name <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt" my.data <- read.file(file.name) @ \subsubsection{Read from the clipboard} Many users find it more convenient to enter their data in a text editor or spreadsheet program and then just copy and paste into \R{}. The \pfun{read.clipboard} set of functions are perhaps more user friendly. These functions are included in the \Rpkg{psychTools} package: \begin{description} \item [\pfun{read.clipboard}] is the base function for reading data from the clipboard. \item [\pfun{read.clipboard.csv}] for reading text that is comma delimited. \item [\pfun{read.clipboard.tab}] for reading text that is tab delimited (e.g., copied directly from an Excel file). \item [\pfun{read.clipboard.lower}] for reading input of a lower triangular matrix with or without a diagonal. The resulting object is a square matrix. \item [\pfun{read.clipboard.upper}] for reading input of an upper triangular matrix. \item[\pfun{read.clipboard.fwf}] for reading in fixed width fields (some very old data sets) \end{description} For example, given a data set copied to the clipboard from a spreadsheet, just enter the command.\\ \begin{Rinput} my.data <- read.clipboard() #note the parentheses \end{Rinput} This will work if every data field has a value and even missing data are given some values (e.g., NA or -999). If the data were entered in a spreadsheet and the missing values were just empty cells, then the data should be read in as a tab delimited or by using the \pfun{read.clipboard.tab} function. \begin{Rinput} my.tab.data <- read.clipboard.tab() #This is for data from a spreadsheet \end{Rinput} For the case of data in fixed width fields (some old data sets tend to have this format), copy to the clipboard and then specify the width of each field (in the example below, the first variable is 5 columns, the second is 2 columns, the next 5 are 1 column the last 4 are 3 columns). \begin{Rinput} my.data <- read.clipboard.fwf(widths=c(5,2,rep(1,5),rep(3,4)) \end{Rinput} \subsubsection{An example data set} Consider the data stored at \href{https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt}{a remote data location}. (This is the same file that we read directly above.) Open the file in your browser and then select all. Read them into the clipboard and go. (These data are selected variables from the first 200 cases from the \pfun{msqR} data set in the \Rpkg{psychTools} package). Once you have read the data, it useful to see how many cases and how many variables you have \Rfunction{dim} and to find some basic descriptive statistics. \\ \begin{Rinput} my.data <- read.clipboard() headTail(my.data) dim(my.data) describe(my.data) \end{Rinput} \begin{Routput} > headTail(my.data) active alert aroused sleepy tired drowsy anxious jittery nervous calm relaxed at.ease 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 0 1 1 1 0 0 0 1 1 1 3 1 0 0 0 1 0 0 0 0 1 2 2 4 1 1 1 1 1 1 1 3 2 1 2 1 ... ... ... ... ... ... ... ... ... ... ... ... ... 197 1 1 0 1 2 1 0 0 0 1 1 1 198 2 2 0 0 1 0 1 0 0 2 3 3 199 1 3 0 1 0 1 0 1 0 3 3 3 200 1 2 0 1 1 0 0 0 0 2 3 3 > dim(my.data) [1] 200 12 \end{Routput} ~\ <<>>= fn <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq" my.data <- read.file(fn, filetype="txt") #because the suffix is not a standard one, we need to specify the file type dim(my.data) #same as before headTail(my.data) #same as before describe(my.data) @ \subsection{Reading data from a Qualtrics data set} If you have used Qualtrics to collect your data, you can export the data as a csv data file. Unfortunately, this file is poorly organized and has one too many header lines. You can open the file using a spreadsheet program (e.g. Excel) and then change the line above the data to be item labels (e.g., Q1, Q2, ....). Then select that line and all the lines of data that you want to read, and use the \pfun{read.clipboard.tab} function (see above). \section{Scoring scales: an example} To score particular items on particular scales, we must create a set of \emph{scoring keys}. These simply tell us which items go on which scales. Note that we can have scales with overlapping items. There are several ways of creating keys. Probably the most intuitive is just to make up a list of keys. For example, make up keys to score Energetic Arousal, Activated Arousal, Deactivated Arousal ... We can do this by specifying the names of the items or their location. For demonstration purposes, we do this both ways. ~\ <>= my.keys <- list(EA= c("active","alert","aroused", "-sleepy","-tired", "-drowsy"), TA = c("anxious","jittery","nervous","-calm", "-relaxed", "-at.ease"), EAp = c("active","alert","aroused"), EAn = c("sleepy","tired", "drowsy"), TAp = c("anxious","jittery","nervous"), TAn = c("calm", "relaxed", "at.ease") ) another.keys.list <- list(EA=c(1:3,-4,-5,-6),TA=c(7:9,-10,-11,-12), EAp =1:3,EAn=4:6,TAp =7:9,TAn=10:12) @ \begin{tiny} Earlier versions of \pfun{scoreItems} required a \pfun{keys} matrix created by \pfun{make.keys}. This is no longer necessary. Two things to note. The number of variables is the total number of variables (columns) in the data file. You do not need to include all of these items in the scoring keys, but you need to say how many there are. For the keys, items are scored either +1, -1 or 0 (not scored). Just specify the items to score and their direction. What is actually done internally in the \pfun{scoreItem} function is the keys.list are converted to a matrix of -1s, 0s, and 1s to represent the scoring keys and this multiplied by the data. \end{tiny} % % % %\begin{Rinput} %my.keys <- make.keys(my.data,list(EA=c(1:3,-4,-5,-6),TA=c(7:9,-10,-11,-12), % EAp =1:3,EAn=4:6,TAp =7:9,TAn=10:12)) %\end{Rinput} %These keys are shown in Table~\ref{tab:keys}. % %\begin{table}[htpb]\caption{Scoring keys made by using the \pfun{make.keys} function. } %\begin{center} %\begin{scriptsize} %\begin{tabular} {l r r r r r r } % \multicolumn{ 6 }{l}{ } \cr % \hline Variable & EA & TA & EAp & EAn & TAp & TAn \cr % \hline %active & 1 & 0 & 1 & 0 & 0 & 0 \cr % alert & 1 & 0 & 1 & 0 & 0 & 0 \cr % aroused & 1 & 0 & 1 & 0 & 0 & 0 \cr % sleepy & -1 & 0 & 0 & 1 & 0 & 0 \cr % tired & -1 & 0 & 0 & 1 & 0 & 0 \cr % drowsy & -1 & 0 & 0 & 1 & 0 & 0 \cr % anxious & 0 & 1 & 0 & 0 & 1 & 0 \cr % jittery & 0 & 1 & 0 & 0 & 1 & 0 \cr % nervous & 0 & 1 & 0 & 0 & 1 & 0 \cr % calm & 0 & -1 & 0 & 0 & 0 & 1 \cr % relaxed & 0 & -1 & 0 & 0 & 0 & 1 \cr % at.ease & 0 & -1 & 0 & 0 & 0 & 1 \cr % \hline %\end{tabular} %\end{scriptsize} %\end{center} %\label{tab:keys} %\end{table} % my.scales <- scoreItems(my.keys.list,my.data) Now, we use those keys to score the data using \pfun{scoreItems}:\\ These three commands allow you to score the scales, find various descriptive statistics (such as coefficient $\alpha$), the scale intercorrelations, and also to get the scale scores. <>= my.scales <- scoreItems(my.keys,my.data) my.scales #show the output my.scores <- my.scales$scores #the actual scores are saved in the scores object @ %\begin{Routput} % %> my.scales <- scoreItems(my.keys.list,my.data) %> my.scales #show the output %Call: scoreItems(keys = my.keys, items = my.data) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.84 0.74 0.72 0.91 0.66 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.035 0.044 0.071 0.052 0.077 0.063 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.46 0.33 0.46 0.76 0.39 0.58 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.88 0.79 0.71 0.88 0.61 0.77 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 5.2 2.9 2.6 9.7 1.9 4.1 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.8387 -0.30 1.00 -1.06 -0.0045 0.38 %TA -0.2357 0.74 -0.26 0.26 1.0011 -1.17 %EAp 0.7819 -0.19 0.72 -0.59 0.2350 0.45 %EAn -0.9208 0.21 -0.48 0.91 0.1373 -0.26 %TAp -0.0033 0.70 0.16 0.11 0.6574 -0.45 %TAn 0.3100 -0.90 0.35 -0.22 -0.3272 0.80 % % In order to see the item by scale loadings and frequency counts of the data % print with the short option = FALSE % %\end{Routput} Two things to notice about this output is a) the message about how to get more information (item by scale correlations and frequency counts) and b) that the correlation matrix between the six scales has the raw correlations below the diagonal, alpha reliabilities on the diagonal, and correlations adjusted for reliability above the diagonal. Because EAp and EAn are both part of EA, they correlate with the total more than would be expected given their reliability. Hence the impossible values greater than $|1.0 |$. \subsection{Long output}To get the scale correlations corrected for item overlap and scale reliability, we print the object that we found, but ask for long output.\\ <>= print(my.scales,short=FALSE) @ %\begin{Routput} %> print(my.scales,short=FALSE) %Call: scoreItems(keys = my.keys, items = my.data) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.84 0.74 0.72 0.91 0.66 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.035 0.044 0.071 0.052 0.077 0.063 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.46 0.33 0.46 0.76 0.39 0.58 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.88 0.79 0.71 0.88 0.61 0.77 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 5.2 2.9 2.6 9.7 1.9 4.1 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.8387 -0.30 1.00 -1.06 -0.0045 0.38 %TA -0.2357 0.74 -0.26 0.26 1.0011 -1.17 %EAp 0.7819 -0.19 0.72 -0.59 0.2350 0.45 %EAn -0.9208 0.21 -0.48 0.91 0.1373 -0.26 %TAp -0.0033 0.70 0.16 0.11 0.6574 -0.45 %TAn 0.3100 -0.90 0.35 -0.22 -0.3272 0.80 % %Item by scale correlations: % corrected for item overlap and scale reliability % EA TA EAp EAn TAp TAn %active 0.64 -0.25 0.78 -0.47 0.10 0.39 %alert 0.58 -0.26 0.63 -0.47 0.08 0.39 %aroused 0.44 0.04 0.62 -0.27 0.36 0.14 %sleepy -0.85 0.22 -0.55 0.89 0.12 -0.23 %tired -0.82 0.29 -0.55 0.85 0.16 -0.30 %drowsy -0.78 0.16 -0.46 0.84 0.09 -0.17 %anxious -0.11 0.34 0.00 0.15 0.50 -0.19 %jittery 0.03 0.48 0.21 0.07 0.62 -0.31 %nervous 0.05 0.53 0.22 0.05 0.68 -0.35 %calm 0.11 -0.65 0.24 -0.02 -0.35 0.68 %relaxed 0.33 -0.71 0.34 -0.28 -0.37 0.76 %at.ease 0.39 -0.72 0.47 -0.29 -0.34 0.79 % %Non missing response frequency for each item % 0 1 2 3 miss %active 0.49 0.35 0.13 0.04 0.01 %alert 0.37 0.46 0.16 0.02 0.02 %aroused 0.70 0.23 0.07 0.01 0.01 %sleepy 0.13 0.30 0.25 0.33 0.01 %tired 0.12 0.23 0.33 0.33 0.01 %drowsy 0.17 0.30 0.27 0.25 0.01 %anxious 0.68 0.25 0.04 0.03 0.50 %jittery 0.69 0.26 0.04 0.01 0.01 %nervous 0.78 0.17 0.04 0.02 0.01 %calm 0.09 0.37 0.33 0.21 0.01 %relaxed 0.09 0.35 0.37 0.20 0.01 %at.ease 0.13 0.38 0.32 0.17 0.01> % \end{Routput} \subsection{Correcting for overlapping items across scales} The \pfun{scoreOverlap} function will correct for item overlap. In the case of overlapping keys, (items being scored on multiple scales), scoreOverlap will adjust for this overlap by replacing the overlapping covariances (which are variances when overlapping) with the corresponding best estimate of an item's ``true" variance using either the average correlation or the smc estimate for that item. This parallels the operation done when finding alpha reliability. This is similar to ideas suggested by \cite{cureton:66} and \cite{bashaw:anderson:67} but uses the smc or the average interitem correlation (default). \begin{Rinput} scales.ov <- scoreOverlap(my.keys,my.data) scales.ov \end{Rinput} \begin{Routput} scales Call: scoreOverlap(keys = my.keys, r = my.data) (Standardized) Alpha: EA TA EAp EAn TAp TAn 0.83 0.76 0.72 0.91 0.73 0.81 (Standardized) G6*: EA TA EAp EAn TAp TAn 0.66 0.61 0.72 0.88 0.69 0.77 Average item correlation: EA TA EAp EAn TAp TAn 0.45 0.34 0.47 0.76 0.48 0.58 Number of items: EA TA EAp EAn TAp TAn 6 6 3 3 3 3 Signal to Noise ratio based upon average r and n EA TA EAp EAn TAp TAn 5.0 3.1 2.6 9.8 2.7 4.1 Scale intercorrelations corrected for item overlap and attenuation adjusted for overlap correlations below the diagonal, alpha on the diagonal corrected correlations above the diagonal: EA TA EAp EAn TAp TAn EA 0.833 -0.211 0.88 -0.893 0.073 0.39 TA -0.168 0.758 -0.12 0.234 0.831 -0.85 EAp 0.684 -0.086 0.72 -0.579 0.286 0.44 EAn -0.776 0.194 -0.47 0.907 0.111 -0.26 TAp 0.057 0.619 0.21 0.091 0.733 -0.42 TAn 0.320 -0.661 0.34 -0.222 -0.320 0.81 In order to see the item by scale loadings and frequency counts of the data print with the short option = FALSE \end{Routput} \subsection{Get the actual scores for analysis.} Although we would probably not look at the raw scores, we can if we want by asking for the scores object which is part of the my.scales output. For printing purposes, we round them to two decimal places for compactness. We just look at first 10 cases.\\ <>= my.scores <- my.scales$scores headTail(round(my.scores,2) ) @ \subsection{The example, continued} Once you have the results, you should probably want to \pfun{describe} them and also show a graphic of the scatterplot using the \pfun{pairs.panels} function (Figure~\ref{fig.splom}). (Note that for the figure, we set the plot character to be '.' so that it makes a cleaner plot.)\\ ~\ <>= describe(my.scores) pairs.panels(my.scores,pch='.') @ ~\ <>= png('splom.png') pairs.panels(my.scores,pch='.') dev.off() @ \begin{figure}[htbp] \begin{center} \includegraphics{splom.png} \caption{A simple scatter plot matrix shows the histograms for the variables on the diagonal, the correlations above the diagonal, and the scatter plots below the diagonal. The best fitting loess regression is shown as the red line.} \label{fig.splom} \end{center} \end{figure} \section{Exploring a real data set} The 12 mood items for 200 subjects were taken from the much larger data set, \pfun{msq} in the \Rpkg{psych} package. That data set has 92 variables for 3896 subjects. We can repeat our analysis of EA and TA on that data set. This is a data set collected over about 10 years at the Personality, Motivation and Cognition laboratory at Northwestern and described by \cite{ari:97} and \cite{rafaeli:rev:06}. \subsection{Conventional reliability and scoring} First we get the data for the items that match our small example. Then we describe the data, and finally, find the 6 scales as we did before. Note that the colnames of our small sample are the items we want to pick from the larger set. Another way to choose some items is to use the \pfun{selectFromKeys} function.\\ <>= select <- colnames(my.data) #or select <- selectFromKeys(my.keys) small.msq <- msq[select] describe(small.msq) @ ~\ <>= msq.scales <- scoreItems(my.keys,small.msq) msq.scales #show the output @ % describe(small.msq) % vars n mean sd median trimmed mad min max range skew kurtosis se %active 1 3890 1.03 0.93 1 0.95 1.48 0 3 3 0.47 -0.76 0.01 %alert 2 3885 1.15 0.91 1 1.09 1.48 0 3 3 0.33 -0.76 0.01 %aroused 3 3890 0.71 0.85 0 0.59 0.00 0 3 3 0.95 -0.04 0.01 %sleepy 4 3880 1.25 1.05 1 1.18 1.48 0 3 3 0.40 -1.04 0.02 %tired 5 3886 1.39 1.04 1 1.36 1.48 0 3 3 0.22 -1.10 0.02 %drowsy 6 3884 1.16 1.03 1 1.08 1.48 0 3 3 0.46 -0.93 0.02 %anxious 7 2047 0.67 0.86 0 0.54 0.00 0 3 3 1.09 0.26 0.02 %jittery 8 3890 0.59 0.80 0 0.45 0.00 0 3 3 1.24 0.81 0.01 %nervous 9 3879 0.35 0.65 0 0.22 0.00 0 3 3 1.93 3.47 0.01 %calm 10 3814 1.55 0.92 2 1.56 1.48 0 3 3 -0.01 -0.83 0.01 %relaxed 11 3889 1.68 0.88 2 1.72 1.48 0 3 3 -0.17 -0.68 0.01 %at.ease 12 3879 1.59 0.92 2 1.61 1.48 0 3 3 -0.09 -0.83 0.01 % % %Call: scoreItems(keys = my.keys.list, items = small.msq) % %(Unstandardized) Alpha: % EA TA EAp EAn TAp TAn %alpha 0.87 0.75 0.81 0.93 0.64 0.8 % %Standard errors of unstandardized Alpha: % EA TA EAp EAn TAp TAn %ASE 0.0071 0.0099 0.014 0.011 0.018 0.014 % %Average item correlation: % EA TA EAp EAn TAp TAn %average.r 0.54 0.34 0.58 0.81 0.37 0.57 % % Guttman 6* reliability: % EA TA EAp EAn TAp TAn %Lambda.6 0.9 0.77 0.76 0.9 0.59 0.74 % %Signal/Noise based upon av.r : % EA TA EAp EAn TAp TAn %Signal/Noise 6.9 3 4.1 13 1.8 4 % %Scale intercorrelations corrected for attenuation % raw correlations below the diagonal, alpha on the diagonal % corrected correlations above the diagonal: % EA TA EAp EAn TAp TAn %EA 0.874 -0.0207 1.004 -1.006 0.218 0.168 %TA -0.017 0.7515 -0.011 0.024 1.096 -1.140 %EAp 0.842 -0.0084 0.806 -0.618 0.360 0.246 %EAn -0.906 0.0197 -0.534 0.927 -0.067 -0.076 %TAp 0.163 0.7590 0.258 -0.052 0.638 -0.512 %TAn 0.141 -0.8837 0.198 -0.065 -0.366 0.800 % % In order to see the item by scale loadings and frequency counts of the data % print with the short option = FALSE % % %\end{Routput} \subsection{Show the correction for overlap} <>= msq.scales.ov <- scoreOverlap(my.keys,small.msq) msq.scales.ov #show the output @ \section{Even more analysis: Factoring, clustering, and more tutorials} Far more analyses could be done with these data, but the basic scale scoring techniques is a start. Download the \href{https://cran.r-project.org/web/packages/psych/vignettes/overview.pdf}{vignette} for using \Rpkg{psych} for even more guidance. On a Mac, this is also available in the vignettes list in the help menu. In addition, look at the examples in the help for \pfun{scoreItems}. For examples and tutorials in how to do \emph{factor analysis} or to find \emph{coefficient omega} see the \href{https://personality-project.org/r/psych/HowTo/factor.pdf}{factor analysis tutorial} or the \href{:https//personality-project.org/r/psych/HowTo/omega.pdf}{omega tutorial}. \section{Session Information} <>= sessionInfo() @ \newpage %\bibliography{all} %\bibliography{../../../all} \begin{thebibliography}{} \bibitem[\protect\astroncite{Bashaw and Anderson~Jr}{1967}]{bashaw:anderson:67} Bashaw, W. and Anderson~Jr, H.~E. (1967). \newblock A correction for replicated error in correlation coefficients. \newblock {\em Psychometrika}, 32(4):435--441. \bibitem[\protect\astroncite{Cureton}{1966}]{cureton:66} Cureton, E. (1966). \newblock Corrected item-test correlations. \newblock {\em Psychometrika}, 31(1):93--96. \bibitem[\protect\astroncite{{R Core Team}}{2023}]{R} {R Core Team} (2023). \newblock {\em R: A Language and Environment for Statistical Computing}. \newblock R Foundation for Statistical Computing, Vienna, Austria. \bibitem[\protect\astroncite{Rafaeli and Revelle}{2006}]{rafaeli:rev:06} Rafaeli, E. and Revelle, W. (2006). \newblock A premature consensus: Are happiness and sadness truly opposite affects? \newblock {\em Motivation and Emotion}, 30(1):1--12. \bibitem[\protect\astroncite{Revelle}{2023}]{psych} Revelle, W. (2023). \newblock {\em psych: Procedures for Personality and Psychological Research}. \newblock Northwestern University, Evanston, https://CRAN.r-project.org/package=psych. \newblock R package version 2.3.5. \bibitem[\protect\astroncite{Revelle and Anderson}{1997}]{ari:97} Revelle, W. and Anderson, K.~J. (1997). \newblock Personality, motivation and cognitive performance. final report to the {Army} {Research} {Institute} on contract {MDA 903-93-K-0008}. \newblock Technical report, Northwestern University:. \end{thebibliography} \printindex \end{document} psych/inst/doc/scoring.R0000644000176200001440000000531614551520562014723 0ustar liggesusers## ----echo=TRUE---------------------------------------------------------------- library(psych) library(psychTools) ## ----------------------------------------------------------------------------- file.name <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq.txt" my.data <- read.file(file.name) ## ----------------------------------------------------------------------------- fn <- "https://personality-project.org/r/psych/HowTo/scoring.tutorial/small.msq" my.data <- read.file(fn, filetype="txt") #because the suffix is not a standard one, we need to specify the file type dim(my.data) #same as before headTail(my.data) #same as before describe(my.data) ## ----echo=TRUE---------------------------------------------------------------- my.keys <- list(EA= c("active","alert","aroused", "-sleepy","-tired", "-drowsy"), TA = c("anxious","jittery","nervous","-calm", "-relaxed", "-at.ease"), EAp = c("active","alert","aroused"), EAn = c("sleepy","tired", "drowsy"), TAp = c("anxious","jittery","nervous"), TAn = c("calm", "relaxed", "at.ease") ) another.keys.list <- list(EA=c(1:3,-4,-5,-6),TA=c(7:9,-10,-11,-12), EAp =1:3,EAn=4:6,TAp =7:9,TAn=10:12) ## ----echo=TRUE---------------------------------------------------------------- my.scales <- scoreItems(my.keys,my.data) my.scales #show the output my.scores <- my.scales$scores #the actual scores are saved in the scores object ## ----echo=TRUE---------------------------------------------------------------- print(my.scales,short=FALSE) ## ----echo=TRUE---------------------------------------------------------------- my.scores <- my.scales$scores headTail(round(my.scores,2) ) ## ----echo=TRUE---------------------------------------------------------------- describe(my.scores) pairs.panels(my.scores,pch='.') ## ----pairs, echo=FALSE-------------------------------------------------------- png('splom.png') pairs.panels(my.scores,pch='.') dev.off() ## ----echo=TRUE---------------------------------------------------------------- select <- colnames(my.data) #or select <- selectFromKeys(my.keys) small.msq <- msq[select] describe(small.msq) ## ----echo=TRUE---------------------------------------------------------------- msq.scales <- scoreItems(my.keys,small.msq) msq.scales #show the output ## ----echo=TRUE---------------------------------------------------------------- msq.scales.ov <- scoreOverlap(my.keys,small.msq) msq.scales.ov #show the output ## ----echo=TRUE---------------------------------------------------------------- sessionInfo() psych/inst/doc/scoring.pdf0000644000176200001440000052154114551520565015301 0ustar liggesusers%PDF-1.5 % 1 0 obj << /Type /ObjStm /Length 3781 /Filter /FlateDecode /N 81 /First 665 >> stream x[r8ҾwgJUlı=9_DȢ#Q9ISі2eKhth4 yfrYfc[ReeLJL*asY*ѹNY>L&t:&J dzшxMHWGRDԟ_/ > ,JgU W¡s1 iuUr9>b{׏e-g<ѭJw/+1]ZDݙ2ҡzLp`w@΁ۍ3N.2ࢆ؍RNGV%LݑPdZ>GxGNwQEɩh:WGёבYqtl#:R1TLb"S-kV ^H,jyh߾<~ϯIqȎ3p|I["}OzbXۂJKbW0x@eX5'Ay0:Ha?p' pKyz6 Cu `za ic?eiO]R벬>?xi?U ^ߐ2&(vboF)?aDQQ^gb iNtD야9/|ƿOPwI{`/O":<J b3G9ix;ҫ׽|}j'Q"ʠʛ\)M,k]ߠ{j>vA4ᳪWr 7*ւλvwӳ,{YJڴςE .Qy>/[9>&hFy]#wx$_S|KiOŁU޷tAxSI7 Zy"2BFLb&Mt s~)-OROkJ,*2R'JߺSq,E5P6tv7ܟ7\*$iDRK+M2Ze{SV'),aKpvwdμd!@pB$X:AO`Fd5?5#1&ִw72st8e[ț.qn 6$;x8S?7av%oZ;F%LK:q%]9@4I鎄W{[2dtb0)\sm oVFJ|& ]OrHwV,62[,Z^V> RdҥnDE*wHp:-{%u99#ĔT\"7"[r5o'RuQ$t̙%CoT9e$G 1m\?7~>zx:;XOj6_Wfn||xeSЅlnz/{s\5| eFZnUbtfPMPW vsn*,1}# 5pA\փxdV ]߬Ҏ.7W.64bzã(ΛXebŁetY~?nWE@܂yξ7X oIwo&.]&J7#]",aЩDz7]۵$~]2E_ZqąMte|uWqlRnk;e9ğnyQmNIT_B99Ӱ[14TBNQv}Wޓɜ5?Ƕ~U9Q_e|%|_=&!16! %/jâ={>~é7Rp]dV: pBXFQl;c <+ΊsD<5.3N™l-kvË#.C vTi-Zc7HM=Q%1:s{= Hendstream endobj 83 0 obj << /Subtype /XML /Type /Metadata /Length 1388 >> stream GPL Ghostscript 10.00.0 2024-01-16T09:32:04-06:00 2024-01-16T09:32:04-06:00 LaTeX with hyperref endstream endobj 84 0 obj << /Type /ObjStm /Length 2339 /Filter /FlateDecode /N 81 /First 718 >> stream xZoGC$uhC)6APK =\߾$ 0WoYRrPރz)4adÐS0J,¨!Uh!׎lSAAN9r <3t>҂yq b+S#HOpPxqf 2*)tV4)kP!lxY@-h%7#h.)h(9\L$T.pvc5 pVy:2O`QN&S#K VJf\$X'IK4l\cj8*Wʈ!IbUهlXur "rڇ0kIטX [֟BY݅q47!SOQDoI 8AAwaI;cXXzX<=iC\m'۷__]_vW7ow?:o|phpƏ[l`VzD1}&4!]eXiSG7 Lp:80BdJRܞɭb!i\g)38Qm p-xf䖍s,zd|˫.˫.\kH]^\>RZnYuaUI-Z8-3VU6ץI bl=-I$6qٍyfb3n hőWec;<4x?S,]Uv<1Wsx10$cހ0%C5,"[lT@KҺ%ÄKA uC1Iz,8jMAd9_:o$aЬ3 ~HIZF0#AgZY`e?b(dU-h@`;иqsKƴxhCGn֕ΏV{ u ǖ/2hEiOرqVUak1Z4m/mAup~iwsVzE1QC(2 &}V3Ul­r$˻.?w_xuOhe=\ y'endstream endobj 166 0 obj << /Type /ObjStm /Length 2663 /Filter /FlateDecode /N 80 /First 737 >> stream x[ێ}WcVwU-X,8l `2#'Y:>$%J&FZY6oSMd1r bx3!c?ybĖw^l<9Bg\Smr^J„Yb(dg(do a!C :YՈׇPЋ:O1]qF ˜ q{a8"&) Ee*D}/DRJ1Iyq& 1 a@&1{ITٌ& I5BbR:d'wf<{69@@br;9׹(>`M!HI;SNHޔ@?)AZ 3wH5XeTBQŢpP*xAgJ 5xQ3#'vYπ0 8$x΂gԙ JZpVd'5'VC 5fŌ١q7oޘk`7fo6;O9XQ;|euuz_oWǵjѤfܼY ):ԎK!آ9Ikyuqo۷U 6J :xϷ;{)Q}WYs}#ǝ_-,օ!n;7ŚZc:od8s9Li Ԫַڃ梦&tRkѪǹgb_c2vhe`-uoIz}'`'RgY/ qXZrpB$lNo'qB@ )b;9⁝{v^wjF R%[;.U. x,njպ~^=Ʒ[&DN\MzT0f]@xѨu휬+q$#i01 ?n\[#7ghF箤/K![[Me"Acx $}?d *|8c+FeȤ񳕆vM [npG]c'EN.mKcx;)y@4Y=0MJʣْ[\lۍP%CL6! [,r\aWٽ .I NVw6 uׇ߳1]@<=yx(|j:yxhճ*߷ ?,WhRv}cf7󻦼mb$[M8K*&o^?= Iepۂ>`UR6d'lP:X@&a2 :B [tjtip! vEW)l1LI4fo6ܭ͡^?=~6{֟xZ- vz7[͞?8{x\~;L~hA;FgHVW %++|a+eRm  sW?]' &)r覰 "6p?ݞ&d *fg<#-fq{^NFbd54:y[`u >Z}+" "p@Q_Cz:(EM֭#8.HnC[蛆ҍL7#% Z)ٌwIkĂ,% 0x;r+k\1vD^5JxĐ{Jt266hv L_] ZE!X:lH)e][ [7PxRw֪CWH!*xNڋ#[[Ztww[F2Rl(J\ +(Ә 61MdžA XHJ2o1"K*6TOMXW&F)!&ق.Ya_PS$FW /\8t*VMȤ F5*)˜&Ҁ>GT+u 2mSVM6OFPЂ@۽hR9FO(CM@Vƌ[9CmP+I(k-{;hm{h'-{;lm{ l+˦ .mz] ztthjtנWȞbu%Ή#+]AD7%X4(vW#T`.dlp\C%i<41m/Oշ6)niQ,;*-5[dBOY]dpŘ_to((`I 7kȥы iHWK/-,v@UAnXr AZ\[Rm.TJ0}ϻ4ox)vJI+iF{*&x)Ҽ +[g]24]KCN[@AaHeʸFB!*rvDuW-Y?Me$u;ZX:X+;#;oZ6_ʭ[hh >"[Zaz2?Ti>4{dm_i`endstream endobj 247 0 obj << /Filter /FlateDecode /Length 3420 >> stream xn|}XU"\ŴݢvfSŸ$VV'h?8NC,<;""fwtg?q6ۋ_Pja3[rqqV6˵Z^>r֙yR3_׆n sϹ`վjfC/E" mWz6͡\|Pm0(B~ϋ?8{/We3W_ W,7 PO}&n ݮ+04g"SLR,efZJ!b HeDEhP'}D~{u Q^yIPu3kkm'oaW"И'L ~WR_7PuYub:`XL"6B ϑn]׷`p_ZfnM۴j67ЃFEJeR,mXfo[4mmJXX8`+ ACvNcLS\)H vu_7xZw)eU] =[v-^rN5a$.G:ʄ*G-ޟ2+Ȳ?LvnbY<`L?l VvBNb$+ 0D RG^߲۪͡~+^!kP.Sg*2U6: ޜ. ZL}pކK0fN!RXӸZXL//յ:3B%יVXפEPSu21YB͡ GLY!'|r%&qeF! 0N= [ QyQs0yL6 nlYQPU@b7+PPrQy&5Cn^h4%. N*Ss,k{7 .(2B*G\BWûZP?׊k\XD-~YQdCqێ~xhE_;zA".ϐ;]=] Igg2eWrD|Ȋ fݾʱ6+hb"p@"HUnyZRX#&sW&ָ2˗+ZZ~d*ؿ=Z@cyFi c(9h?>@q 17s<] h<H\v]MiOBeƱ2[TH R"%$Ljo:I -m KIQA ׃so }W>rH@*x5+j񗶲u .H*hJWKoG"3zB K,V)YzoA +6i>aW;.Ģ mzI/`Hb2 s0/~w'ݾү } yX 8]mC;:ω?HLbl1@Wo^T26fN:/8$ &V yġzlÊUڋX?恳ڟОRqfxa4oʌmR~% S`^@pX$KF|v:?JFwA&3YWo.q "-R+(]M[ 9>I$d#Zj%SC -0+ M>|f0pӄ`#PESQ)NF"U?zC~px|05+&.里_ߓ=06d7^`К%5`(\݀=|3Shgwbbw=PsEsãr9G"v$f1j zN$!$idTm\pH{8RW(IWAioft! /h$BB+JX4Q8 6|PSJ"Q AAS_vUwWl*}w\on}v]xÍw1R!]3䧌^;U1J ڌh%^lfBI2H{ KQ` 捻l[ U`HpUdw64e[u5e3c `g2e60I4bdO3>9]6iՂ$^+:2V\>sz4?|biѤȤmNޜQa{Tn!@}#ݶq鈤£x;#of!2Lj3#^-oKh8;wv۵,!'>Y@|j57tPad*-O|S/s paNM˱*o%h)pP^v4d%=^*~?`2x4<%O&Z y{qMܮ)C ֊SsVr4nx]q7,Jz>5;y0 ib Fx{ ۷ An"6a@O( %)Ayqf&K"Hrh$| P}DE V̯x?٬ˮ)SVy/,-UQ cH:b?_ϡNukendstream endobj 248 0 obj << /Filter /FlateDecode /Length 3413 >> stream xZݏ۸@.֪!*+.>>hmZ-$9R^o(h_\/9,eosXx[.$.oC.-Pj2m+Zi8Pb[ 蛰ݜlMn6)'ĩ[HyQՏK*;={ޖJ+6 V$l`p(>MxhZssj#\tǶ,ݮ,(@ۿ-Jrk)Ǽڗ *[&lLoF UbCM$5J_vu~DySėǡ“l @f;v'~*ee^D&NǦ,>fxP +ƫYp]KO,-sUe Ԓh &@f4fR0/XE?UvhICc/g?DChAEQ8KFe&FvEnA&\z`B@a~+Vl p@T#GsF.J{9E?RK N$Q,o3UI_R}Dwaِ)76G)YLކb8_ 9^?(I'^U1y&>FOc|0é"vbO5rq ‚& &.!,u>TTCf!>x(BLeȱ;oV6'}噞t6a 83: ;fA8"r4qS@m#g~QR+HI]ұqDtnSnx>%eʪBDNc+F 2P|,%H CS髦j9RPU'ͪÑLɡ>38D_uq(!t/۸A4{|le9 teőa%)!F"bcEW+eHi-mW=Y@pUщ=58( dj}~ ÄRx?/Hl휟N_]d#&K2g/_@kFbUNm#ޅDKgs 7j ^!u:+D6ͩ_Ԡ#< I@l/n]bqn-2AkE]]nLOȹI]\kXH֐@]`Q`Vy-)'ؠzd4/`Z 3"a) hUF:0Hp˵.sT|0wWbDflG*Nј4骯)PV˛^F:J<}ZmsꨪpwfVA¸ dQgGݮ,5䑚rp5#OE _okRj&\"O/aY d"(I7 e9]w #>pSo*}y8#K[D4>Z %A`4VZMQuPnNX\ׄ%, '@5C(GrE *DB0Q.Lx)"2 s DWL^L+ ܯTD1|*ּPh վh . gS3 |;Rdzpdb.O2t"!qi*~.>"25+Yt@`-'kQɖN؀v%ಚ1Mtx< Y~x9wQܽTݫT&N~w̡(Id2d2Zr% Bc첉@(VlSfý#?T[*  M䛁g\Ғ6_tR~hPuRDC 88vpC""Ŧ(2qZ7MX`̔K@x|g˜bՄߨz;\֢b3rXAXKK‹gNSe~^lz8 w``X}&]䉶e&L3֐ )ʃ~v,mjv!vcS"?)-:ZKJOU E,T3EsGeH!FZ x fXEJKM`Ƒ'dCD `f۔ 9%\4=ZO~<]@) Sxh,mW]vjl.-ߑR茛dG%v]%pNRon-{|Z˟vgPC]&w/v[b]*~fa;)SٖͦAJR Lv:q1(0UTaʙRU8pF "2Y6$tS (##gh\`c:! q}ƹ׿Mp!=eC ~T:< 1j |pdsАnCL T1ލ}~A٥Pi^[$3 {"l̗t—3K,z,EśPPYf%u/ʢQ4p-uᛙx"N^l@TfM^n h&,6Z*HUMSF^2_ýY~q74q/HC&̌ԁk囫w(gߢ~rc')k]>^d; 0!oQ5ɝ5Wg_~ |֙WL¡BƒUٟT*#@c)5}o>(;|,KpذX7f>BZ]84^wA|`Z D@6<¸ZqCQA?0)@Cy}u"|@P1 2cք4'4|/&^#p&Fx0NL(T"YM* U N01R"*%т l,#j)pXv `׌n1ߏߤ7tvžk "?`K/ x!8G1_7伸 ·"5.Ƅ5@^&z4SBg@1e9@kf'ͲuP (3- =o$endstream endobj 249 0 obj << /Filter /FlateDecode /Length 3394 >> stream xrȵG}k|H3eB70fVYjF9@$$q$h[Ekl)okB$한_R4;V˫Δ**lywGLU vK?[n~RM d!w* mUTuڡl0hݱI{x6,dLy퉊T߯I,K#:EUL #* 4 jUD 0мBH"R4 LDg ȗ\L6/Y7xhjs@K/c$SڮϧPb׌8v}=%>`a@ZxMFn= bBn|al Gpj ҍ8g./`Ö*2ӚEYqq?ª7$NqP|{ŧzGL'"J6/?ר t}4G-KM*ӾЉw0!@:>Yc6bտ~0G,Va i Nn<" :9L0rG|9!-gFVq 🸒JF$مU-lj}9ݮn7ǧܬEw>\_ewݯOq<nmݶŶW}"Cpu.2+)0 .d iYlLbe4ge %a @$hS 1Ć@!aOC#^a&&A%gq -L0_2ݦ3RW|Bb+bcsWBèB5ovBCP!|8foBD7`| T4#`0z{I8ԐS@U+RCYb_+ɀ6Wݘ.iPc%^+ *XiL)hCC֫oI"f [("ȋCd.C9ѥɁ7 :L7k0u4~HO;yEN_t k՞֘ ˓bKL׷I_ 3@ @ uMv׫_[$.PR$A%r`˜@v0;nEKF]#\ rX W qOǴ̮Wù?6qia[4qs|G"f%$kHԿ7=,u@ e bbsxLT2HLOWťK@CurƓLg"~KL$j^2ǻřqNzq&`im3.KOk0&n`~YTJ'H{2D(p\Z|o<7uҪPIAhRŒ;ZKM1 "m:nrhm1} #bTv&hjB35dxЋa0n<6!h;"&@G。fcb$v@O_U`zVʼHڵקcó: Iv_IhհbbA0$a7켒J:%Pa<C>iP:01F5}oCP[ƒpյ3֟yoV.զS?.ԤR9ر́u?(8`ZI@&/7`rxˊ41 o rm])VD"qb >0\f<ʊqK |拋6 S>h 7f#n>{3m,4%&LO,R-E5AN"DߘjiⳡPU ﹾxcYYg?t~:E%_i iIǎ ˓z퐋֗B2;[8GipGNml/ua ^fBo1QA8bcd^蛖: >#Q"1?{^DHBH#&MF<.{ϯR \d?eOhǫ endstream endobj 250 0 obj << /Filter /FlateDecode /Length 1533 >> stream xXo6v_!z9"E: Cb3ZYJ%9M=RDj!zo_3FywwX1j Uow`ԨLȵ54r8-Lֹ5HRZ ڞ=+e!}43ȶ׫g[ʴ #@}mOݻ`2. tւyNS k\[H y哠f' >綯pخ. :/ra0*gBU s2pbrh%T j8wIt_MnTN Ԣt%#eׯ7pb|"W6Ӈ~C×}<L>*֪Ia"qAE|9vCWKS /@peQ#8'.N k XSzD`EX֮Y"R:iV 0~#LjPF%ڵQBLs- ,K pa'XNYA5 {N~.&'bɩ jP( B"곯R&j΀x+PGPYo}n)h $/yzGf<])FKX&΃lE|u#P^hBӁTP-g6"ji7G V,]Yɦmd)*6X\]>}iF|n[c>\55i+{ Ul>θ^dpwBx;߳<vMeXv|$,8ʟ^ yz {ʈzNR4~d8g9,CݩDspljѕwμ|8`Ee8GcY{GywGXzA0pnb=rѦ~%+̂R2 C;}޵uvCx`>#p|=@^(QpB6=F2ܺΎa Gُ_EU>O$~k0zlՏ.l jFR>pr׶]4i1f-1Vl nFo>~Qm6-,\ d1k '[(8׍ uոbؕϚ391Aj' kNc f.u- ,JFPEyEn[+lEMQ ݰ+W# BW{B{!¿5Fb'- Bgf $-hFE/PyF zw_ :|Qù)ߨkN0R34N±w -,ssv_yetWWwWmP^O3:|"{ۜ%D%sr2@%r)Lx\wPbendstream endobj 251 0 obj << /Filter /FlateDecode /Length 3748 >> stream x\ݓ۶Pk7i֝i4u2D) Eٹ (Sۇv$?- fqs.~STHoB`ȅzqzIr圣:ݺKqr\l!n6Lv\#ko_AzEGbvӮOjptc!IMOkI!B8ٟc*9vLw#m%ɱ#tpl1.{l/ LʹHJ`LQfbuJAQ1`WNnÒ#c(vŢk:kMnoJϭȢmA@-Gm;ΕuO ܶKІ]otQ %)md]{wG[ ^UYiEQ N`ؗJцv[n3=RHEzZ2N-I=Џ.П A8I0k΄5v=ȶzIt=vɱ-djMRZK~8͡>u,Ⱦ D?pS{N{MQkpޫT0 8j[umEf PlpKבj#/ݰtSП.4<_G:hv3&rN͒ܵqp aINJqR{ϘV fonɦ@o-ھ}s{TɡwsaD=+%}1b3Q~ݎ@'Ӝ9_`4tp_9 w?z+N&k٤>DKAW^!_[ zRaBp `*b@s!3b"l3?_ZuxIZmTQh h5 ?2$#4P!s Eۨƌ`FOXH$ hmv闏FЇҋUtj#}UcT7J*|T} NF 5~ 3ʑ L46%ۜ)놱^ 6{ݦ^75\P'J:fP4gC!SQM}ŔTZ~f $/ij*!rj=PU3*`! * :FԼ4ݓ[Nk :4at~O~؎c7T"CɨղfRj%+5m1S ZPLjupꨥ0j ݮ_>,ϯ}xV'dSIP~Puh=mTցl+*ѺkS UI>bQ4ٵXKH{n/{ޕ4 SfT8%V\:ȑ*vJ. :o)W㫎 "zhuA:P >of$_ /P>Ց>9sK[c%P.U$XIj郚 8t?(YU1AwC1d4egsN{|tƂub3[l UE_$3eP^r(&LQLEV9b&Px3Sx ~ K5R z\W\sD U}0HLngr-D!hTRH]8YŇl K3$Vwf@uv),cSTz  Dp=MWTe>۬d>gNVBDҝm'yB(TL]Y[+D;}fp4Pc@r ˰3Kv.9^H7ᘎ#쨷ͺ L?N՟A$Jg0t\f/1s%E{޷9wlt<̞TT4;4)70YS4:;u)t-a:Ȯ?x{89txl?,5 3W SFfQj+\`? }wpp`9}CƎtֵ?e:î҂B6ö}zd8]~xcYuUQYp1>9۹. qK€& JRPLnxf˶>F`NG#a\Ag Ur5+F7Ssg#X<`va:<׍gasFK 9+{PI7,51OM89Fy o6$??nB2vV<1iP41tPRc'l"ʼnǦ,YFUj-8BeTk% O]E͌?3&x0vd:t=lO'dAx2 z>2~)& LJa%t!0|dx< ҉j/%>S 7:gG4n&I ^v ,^A5'ŠlL1ǣ>% &d9gۮXpKP4 8 Žt@ڡcjTn6CGAsV' {,9uOq{|k>BDgQz) qW/7}o T6`i=W!/Ƅ+S0%RVxDxfDCاcyr,߿_k*GY0]|2&wۡKE`u/׎~J.._C! bo!g>a :YXacCכwCF>qlGfl*9|SSV߽ 8-} wPEur'`kEˁ4 B+\^o'>0-~ iyv|pěn(~>2oE5K6xtD,{S U(z9^L r?M_Rڒ)}q2'K}qc,8ҹG`ɗGfqL$uNcuLP.YJ7a' gI.۫"_%ydh3mfe8gɰPW.+aq;z}6efJ| [> p<>L7[S&K8(TbXYuVlaE[3wx~)Tm V&&":3b0IV<̅ s%܊p |-wMQ#^ofA;aϬn&?ɣR¹tH?z2!Bq\VP{bڮ> stream xXKoFWa@»bvd8ʢmN$RCRxVuɦV |0]]UU%~h2ÊfO+fCfgI3W8uyX,cL[x}XBH=T|--udϼ̐W(RCۮPM?*QIfT䡫rTvZO|[&$}D1%` ]͚,.o Pdx*1^Ab.p8CW-jx&uTeͷclV7qG2PqQ56 ҕ9SxlK=[Bٷ9RE|lrsY,C=Ǩ bb ϏS?TAǣvAz}X6gx,n驊&͇+"Wooc(n x{$+hm;=x`{K|E'eʚYO).ؕm8'= Ӑ_l-HkPO 9>CTP\ F~+":CpONJ22. _!ixy%h]ˣW RIkN ^@IǮna,RoۏeV #dvvW#pmsg9M;)ߦVWdaBB_nV? Zm Ӵ*cBL@Q"LBǂFzFh-JPn)Zҍ_I8#PcBJgX83i9J} WZ]6yOfZKYh*ξKi` Se'1J[=Ai)vW9$,򖣧5 K/[3S8i}Mx_3IZ@ gC#\{?-w&nT+IޮJ嬯1/\l4;qnWr%@ JBH7@"|f>0 Qh $j"V2@2Qb(Dp,N<}0W]vI>p `͹8jF?#HM\@DYؤ+ Ɉ'P)T9d [/WBo>W]X͵.db^LghPF! 2 !abj.MλbZY%Cݗ'i[qJ* .E a8cYhlLa{E s~i-l)\kW}y"b1)-t- +ՏݿںsS9f2X~K,K nYa3EbMi#3s ˱[h*Yp^z0U';&/#55z.jNɢ%G9*/S}> stream xYIor_A4Sk$``07+zܛnH~p*{4ہW{{[*RvFj#jRIj(Y Qpet -xEwBn>uҊ_/fQ@ f5&JRZj^,/撐PQXͱu *΂$!X:Ú [`M#!\Nu? ռqr86I}H  53A&$/ ='0T #3^Ν}n՜ *3Faxm_Uxν$Ǻk.NS yj#.s0#GB]>عXo)ź!Bʷ"ny\naR(o*8$>r_Y0ݏ_]iWASv(+ۥCLSa1IUR7⮫|κLq\f`;UŇܔ/9v"R@a]>7OxF3A Zlap5L iQWsXv.X@D!.H%`ufhsmWZ;rVv+WΞsja->.~5Fb-utu\YR;m*q}tq]Ur' ZFFCin jy"I׾;lڥv8:'pjL!T{ob}:nwKD0y\w]dϱ w^֠ |SdpVug-m@=m{t AN\bxm|YY'!N?7І=4x{:R{]EsmP۴>mb%崪%F(F }PZǻfpB2h ~*mn6& t [V]kFχ 49ܵb_X2Tj{zho^pVy"-•ChXEδ]#(eS&pmWwG,SˌU,MqXgLwixI9WѼ8 wk_\رdJEp8r n7U[}0m]9MU57VgNQ!s})=]T֍uD( pjoNп z0򪻱mN뫪D078 ̉`ΊejǑEs*U"4? !6"ު~ /~%ۘ›7>#!sdxN`TR74 4]G2^A` nn* C6& ljeYZ4*$O&>~D[,wh>)xyo*Hcd=&TѢ&6}>x5y5fz抡3"U,l5.ɘ.>aihπ'Qwlj.h# W4^)}wѵ|ѯ1tOaNƦ|ı-~ۡv{P6\8/ėn"3JqwWg2XJ){V<4a3݋nFtaPkF uJ_YFsQj5 #e3i>%FScO>T. w'9DOa&EJ u>r,P#Ok{u񨅻5łn ~ hFңb#ڜy4'37Lh!*~=>qkD^t&Oc')I;1tp6NDZ3jc Op-] IOt\33ivF;fknwme3gIL"r(u?L]X=Մ)k16`k`xA5v4<3ܩ-z_TJI g%jj!p`).ڴÌOS626/͘ՆDe$7,oȂuFg)Kg6fu3 tifCd,C()/b(0I=M S`5$#)!2y"H95 vއg$$es:IcS=݉S"O=o\TK dnmb#[zendstream endobj 254 0 obj << /Filter /FlateDecode /Length 2083 >> stream xYݏ" @b4W.R- ݇=i$wWq $g ͙ Fyj?cÌ_j_}9wk KeX ^pa0P-Mޒ_zIݬ_ZGzj*- 6BmWӊ]5rz{o eܑ5ՇSuX=}AV%zI}7h.r?g**Ζ}KЍ |vtH i7uQn[`E~Nr%ys^a?T9Z R-BYr UT1FQ6 TMf!P㩻{jtc{l-MjaB5䒛]Oac`?VqI>9#ia% v49]z}xMrAk!IV0j_!+ {j{MV!ZK\I)I2'a>@2KۂtD䡨yY}B^EmM\zߦoKeeQo/aC(L;Kh#Q8;::_(o ۘ,68S2 ~BzND뤣FMi=fw; cbv?d&\\}{>Ƈ<=b ;Q1T)_yj R P!T<]q;W%rیA;=D0tTBZ,f:S4K2䣹^b˯Y*2ϪB+ %+,9|5+1Zb˜˃e|;։'vksKX 44'aH负Hɩ(mSeݽHxRyȋXfuVL$1cZsfp^M. byϰhϗi\N1*eޒw54~c[pX ""Š~>!Uxb"K!9Bč7qw3tA#SD)±Fjt<39P lmTS I͐IDN6wv.蜻< r=Gs r'$Jp"αQ%宭'bmnj(Vv5JC"<(j$Qh& j;K6i&#MmcyvQ9fw*A?-TcAv!*f(0I<<eN5찿|t;򯺫F?w GmG7$F>d'ߡűq c |'1niYm* 0 ui!v8gɏtn $ɏi"mF +I|sT8l =qCi;[ i8|!Cyٹ @(8F#6bla, =kש@تLR!sfPo_9>]5ڇcévj!6ĝɜeۥWy#5u"U#f8 C:[Day3}W/3#U#\nnl+*F(BEOJ^L,A 9Z Yp-PW,L1DLЩrȐ|IJBĒ `9I#9LD&^0hT> 27 `^1Wcczl ، s6y{T+ORBtNc7~K}~qϝG`t2t+9^~"{HG11 K+'\AZApɒOQMendstream endobj 255 0 obj << /Filter /FlateDecode /Length 1753 >> stream xX[4x쯈 Fa"VI3m6{αlCsb\sqe=ahׇ6߁fWϝ4)5w <3T{¸ܾS161cDU HG7W㚑5,A{v|YoJI &Rż IR*'CKc6J`D`IR"f^⊸ǙYh"y3.9 /lMOnI3^_ѾǜZtS4nq p8MS>M}hڱC}2iF^OBsSx2>214i]ml5bdxxI0ۮ9,nxo멾(o`WܡMj̸Syxz;4C{+~iEb TI ИQ7C,ǂcc4 } n z*}fx)}KYW!ƵB| 4asfGcWFIc xU qʁƴH P j ۙH=I};IQO`DhrP0t+,&I88c3豱|j}Goʻ חz8ǩ,BH;(~;q\˝XiRؒ [FX)$s& .v};\,_4rs5 `eiDX 5fCxTHp%Kߟi /mi:v}mi]Pp Vv C]HG`5Jܰ^jX j#$Cb]]â%',?tx+znS۟JBP@"N\k^oV/«0%_IE.pOχE[XOZV*q|k{maCR)nk;@Zr-bʑc9!K!6Sa;\J dhm1ƗKIQQ(Z A q)u*Cfh ,tm?yaDxj /(Leynvp6h_V2>)OkNo,2K olgYT UE[WM}&ߙ,1pW 䄨l Ka0X,<`@1U HS~Z 5endstream endobj 256 0 obj << /Filter /FlateDecode /Length 2019 >> stream xZ[sDѿBof00 -4JrC=go^5!M].H}粗7 /_gx5{3i-ٗh̨j^5k %f ~ZyUs/KBb3tL*#^-ϘZ/ڮ;I0hSw}TpFHRu|,mVJ66I0P0Ib7 W.z_SSf E$8y//ChZ-u.z[RիU:cT8[ym7\PwMͲd cY]Q c S mFWznb .X+ bh[z|]v8gTr,k&ch,TĉM(U:2^Wg(s$@t7GF$Hs VHp*9 };` 3z4.D&=5X+2;U{ ]FK1U/ T>õt~v$u]Tr[Gv]CŽ Al9sא asU^@sGz׭.#֛dzbŸtl&b jwr|!VF.|]s%9ݔ70NjluȈ廗~H =9tk3̡kӼ *gU 3a=cQ}tÕ\8)O0:7yhwR y upb !\uw?EB2Wjha?Vp[Pf|~Dn p~!1 thb}Fn,`N0QK?3Ъ]?OS Ô 3n"aW<ǛtAH&!8m2Gf{P3f<=i~5أCA03)@}&_րiwenUm62qLQھO/mԈʐ(=`2&VgLq=Y͞4ڝ qp![PݯTo:9-v'EߤR"!%P>%9xLL HsRMs&K&r<Ƅ'Zbi8#%}uI.>bendstream endobj 257 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 7467 >> stream xxw\y:+15thZGֽ** "CDT$duC$@޲ nq9]֞=yYq},eb&vWz. He=s3&W0 &4|s ߜɫ>~8`c¦I[lKݞqz!Oᝲ#v{ExGDo{ , -~eK>Xr՚9kߝ7 "vo^*bM&|w=—K%%y>b>Ol #"D,$"b JOl#ۉ2b$> v+]GL"&bb *1p#4GF >! f( b<1HE"\0+uqq.]:\*885$RgDeu[nM3eRФOn2kfȫ7Nv{{{ڛӎx'^{n~kOo ?Έq}{3ggv|eׄA#0y$850h̑wP}AGFU;;\:ȠtkCVd W\34$Qh"G̃$ljpL;=,pcv`ηD(B$Q+)9RNcU.Ba$T"U)|rn}plVCRW(Rf&,ICp/`֋UΝW>2ǵKYj"z#FK_fe1pKcJ?.;e4TR*{G dD_~YrÞ3޴#Sb(ie.pc!y)h"\M5}͆ x>"ށnߡN_=&Ry/G#[ NOe|nR^)dZa;Y $K8ʵ hTr6J㣹0I]/3S*!?7tGHLYyA.)P{#\vuU' +_Ye44>-C2 /, 0+GN;4,lZ~߆+qbq@ 8v~ ۿOŎ)\5W.8M =0tnV :$JuԨ/$>KOqߑ,{\O(>]-t᫛'$io>/؛ usw" m`4s,بR(R\ {{v jG Us3%C]I&!7E+byz%#ޞ?b~xo2@Va(nSCK[[Sx:WG5#S#4 ]]ll~#oq ɠFjMWЗ }柄~CL5 lu*ll P./],TBeux g9l M▛RS ?߸ pt[zy2ĈbUaZmf<[9Fsa׳r$+:ى"D|*.ׂzAws% ݬȡ=!ӗSSgٕ9p׳oC믃s`H$.KEUs05Hi@2 R{~LJswם]< OF\(Qa= N 'ЄɆ{HqyPi|edS'j^5r) gE C5ZJKZL OD>36pv8X^aDNzUv1܂sI'eqՖ|Vy9T("8KP'ƪu$h[wS펮za_,=l q8X cSVpO8ȕ =mq!|R(q 'E'#Ӓc j5sQ)6,rlKf_Nv5D/tmX[ j~bcE'plpl m|Q薐hUZBo 5mewRSB{!q*b28P9yTg $ "Xb40CdGvڹ(1JQ.es,mr:פ{~iIQ ZZDmb 0n?+z EN j/ya_vK8c3ř^]S*+3rKLHշTu"2tE!{*O]r*z܏:%S}lVf*Y!LThYO;Ot 6^}+3BД[ 5%mF0wg:$}l>d 06wu޺3@(:q^P|yH |_)3]/8SगY5lĂ~4 Me-;Z"'rz*UjRl^%U+|q(ؔ{wX^B62eghqʸC]t@gIZl[p7B}@ˎeƄOX7႑5PyvRu P%22Io)k.sl?4vw_ERk,L"EzjgMDǑ?ő[ZsHrVuYTjWH+Q}n@CV!^{u5 kp/P1_e~@ S EXd_RM % J8*I`BϩqU)z G]mhg4;OZ g(e,*+w]'2wܶ/b?) 軮rzeVS Se4/*BYۏ YfLA P%C" kI#"Wvww9xTӷ T4=I(:t~ Lܘ-g8Vj\4w86ay̱e3B_`cyi5/qW4I-q@/ (aXKzk9${Bdz<8q \5b2bO?G.ǜ l^ܒ/3Iy@}?hw: sN m`x)| ;;+m aYbPՎ;E!ڈʘGlP62˫ՎVmw)x N&_:ΪnxY'~z3u":1\li^EgBe 0*9rLZ[լս rF͹tKX/5+KKpz-*j`5|37hL\1O0'a]~b"T*gE+ӱ2z7K߃>=%l.o+$2lm7 k+\`o'@cVBV!@}Ӫ^{.n6Z{Ow mu2aEdi08B퉏 z9]G7JFRV|ov`4z wV^! mI<˿!t.vOI[왠ڗ @HyD`O7grP/>ϰn?0xއyM]:QYIUֿhOuҨ'v)TX=8)j oXLnq?BdP]n:TV,JԶ K<Ntp=6;j:&yg_=Ŀ)zk8v^>+h6˼ZGGG\Ʋv,^:eJ SzI1 f .N!ڜ$u^*hJi$Tӝrp%?ϊ]^4b+m!x9$@`Tv +MgHUj4G8zk zY=^PV2H4F}99'`i@Ov r r 9ƠYTf^%abuD &V9dGvpɜ%Ɯ9$PMѷD>Xc5yc[<֕dP1|O-N-m$(06@WEb"sZÌ^ɗ,"Z+u"C"HzMb;XW]x\?{Pz>~/b'uKԾrlzYx0undCH'jh*YnQfUAYw 6a̙GW俁,z"{(fZ!;ׯ`0]ӹDo.R'"D&D 1Ւbc~~{@X a>au^n UȌ̒$WE~va]Ϡhn\'3ӫ$!`mǙ_8( D!K'g{nkPWxF#Dqqb1Bv9kn؃cEcao6AF]wh5>\}C:j?dp;9s@a?(Od*q irY9nG;\jukeƪ8aLu9 P UR`ǷO <[!>h|%ڋ+ vӛ'c$/m,x:o@Cp7c2:,MP-`YҾΐ3ɗN"Ctaz8@_GCFLNڼt3褩iU5I߾âbNpМrm Y q8yk #|oö $m:h9j-rBح3= ]W5&}-WP8Z|Z;<([21Qz>G|'L@v.. oRyzq}{Sp" KAhU8[sZx@7{Gx =;S)NWFjm]8|:Q[>dD 0Yp&!M@%8*qE7ӯF4p?ѿi?`-W8 \[]`7vhK#B ucM g:D0G92:y8M"$ݿ K=SZ_-n5=r[h8&W~o sbmV18p[eːNe{JudW^@-ڼpyjsMyyM_*/lY%D 4 ]<-X+jByq)1UM-UMMI;fޝMu76i?,}qxbվUsȆu5,k#a3.b7leixe@'͖5^&sTV8*sYVM׉4TeVJ 6{Ub))M,~cgjjyY\' RSUl~ j,Ŏ2`*3tU8Mh. EרLYKNLQIPZtCIZ $Q6eUYI\ZTd2KEmU8K_elXxHQUW&lLNw~N42L&Nh2։D7K*endstream endobj 258 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 196 >> stream xFNimbusSanL-Regu;m  BR3kfNްc1J^_mh x<ݳ<#m uwvj_5_d( Jpendstream endobj 259 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 4694 >> stream xy\S׶OD9 NZlbUjaN!LI3)$@hdQ-VUzvۼގ\ﻟ|g-5}%ޑ!ʃOiy!!'v:"g<ݶ!9R?-q+VHJޕ ٛ&/?q0oL촕/Zu Sj?C-P˩C;KyPP(?:Jm^QoQ۩)OjzEvSW}7K͢fSs(JH=CqԳ+Fj INPiKCÃ[;z9sb-e?Όf&8rpvwjhx9Qs:\\%"qrG[rd}4!`b+PFQ|[<òU:OyZ>7H6քdΎ(G"VBrAϸq6Yx 1H\sʭF,ePp.@*!+43+XfKM˂C :#k,LXYv `ypxx6_e7Vp5|,KBKGusJ/~ J;.,N]N`%/ZyLʋG YwO,4pL ZX+Vt(ϒzI Rj{&; WB+#451eft瘳!OH6|“H J2Ԛ[j;fُ]xQq-$ȪХJLAgLcݦWD:A[:#MڜKPա1'jk2-cWs')N3,|h&) #7m&6^+Pb(a-Lɴ ҒQwS8sn47 2A~(cJB<̰MdšrQ2UWJq^{)uMuIBɴən_uVL=Ck vX g/E)\vvaaaq>eV$Pl([j5&6‡WZ%[~,UM$ Ql<Ҭ:{.s 1%bcS |$!Bf~eT~V!7LU P&NTeU ˣI5jhGk> nq(]֡-"B&4գցޑҲJCeUc\BkV! d#赃_3'V#*!K#!3#> ;%7 MfIX,ݽ'8v)'(ovj%L@*GGmLJۄXo;fݫs? -@Gڭ:VN|qN؁}ݶwBJEehxEC0YGHc*A%e TΞT# 2i宓5MMswέaVF^O ^d9 Y!]%,>GT]KHA@γkBR,̽vމK-KDV\f 𐖩kkh%s#*IY 5I ަmXIx]4CyujR NRV'q:_c1iA( ŝHN,Bި>ٗ|gz/0s?zD9Ԏ3 1#l&OwN hbe8 3ߝ$ Ax(Y$$x#V%b 3$d.6m+~ FOm0K55n|9f3뛻Os5yb]fe1ʮϙ;  {0 .6&,7X UB)~~O ]+#kZ%Fh 2BN'{uQE ؏,Hl?/.EV}Y$nm9sa>לog7MikM&z 5U[$y7([J<{FloNO' .ZQB(Dd1>nt>@?F贂L ^KjzԏY`<=b|+uh;͠ud~ce6Vgm1jS-ٓ"IL*&ރ.8g"C6dH$ QCdl""::#%%/jMev"'Wk 6MI'G[l ?Z/[  N%7 $"?68j=''duat[ U&d@=0zܓ!ޟlEgEzo]8n '͉fphVݝLXuzm5- )6`'$]RB,u}},}ǘ:z<5L_2vDbM%(#y?[x duKR,5'X}'2GI  a~wL,㷿l7v޿R/enR%‡8Y,AI9 3@6z/}{&o(Kuh{(0PRBͩPqPGlm"bD| `O=bM"3Ɣo60 x:"o\s1GX Xq,?lhkLM챤lIzgG!\Yaו!vi`O/@pҍ㖦a4<};&94H,S! $u".>Se_ ~IS-)~.a'K6#q%vKsZvgT;h_wSm4 oJR'O?]{#e58x VԵKXMX]_X*D쾤ؚS $>z;z %E\5l$k^wOnMdNώzL5f3ӝ3ar:JggBQ §'endstream endobj 260 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 4035 >> stream xUytSUOIȠMě SEA?APl3MK4MN9I%M&mδR2("@A?QPpʕadN>9,bb%/,!HU~עsRs%*=G=6=ϓOߟt)"ztyyEjZYܼc" D2BL#64b3!CdˈrbG|@"׉7:b=QLL!ci /LhHL<;i19 w2τY̧A~U}59'(#VpC0!ݥw(htB{NCr(Ѯ^r5FQ! L?E7?>_}ɂOJdz@0upEj-Y",84Ui!p7)0ڌ6,@p;_5tc} 42q^LeEwtRln=}(FaFX``L4%yo ݽy3ԅ߁k)UmyKwN\Mm-Vq恵w^ tGIC^8h*[8U\=5JUED( mCh$*,0q y^8/E9A7vՂ:`H:{ު[Pt'h%#wjEA.08J^7+h.\ᠧ,ݒAQ$k ڗ*%EEa(2os.z,NbE9>*ҕ F6`[ȫQUgXtB]IIn=9ʓi =7ޘ'^x|F"/َUjsx 8K(@o[Jސ^4E .#UoH5 {3.޷T+C/d @F7T\>qQTz"y ^BO7=ya^|@>vn4 OD=bAUZ,Qh.=Ђm{q7JL`[dM\cif“C`WCLW(&X7= \L9k7X,-HVEkk; TཕVG^mxT q+TKYP~> vǥx2?seb'4WPb0>; \)u_Z)w_{ޘj$fCxl.L?;;=:pš- ,NH+d2J֫bƥD.7U1)2 A]ڮdGyVH^MX%jxt@ YBR.R̵ t|fɦ(xɚ^:+l!Sr$.'T{G>@6jdQtdE@ ܳ#a*^}rOWOu^?+*Gaf>>ǷQ&FAh1KTYaYYa6@Ba5ݠ \!8Y.t'e&0gt'YXmVun B3~.'xa9-<9pRdC6?mF'V﫫^p%ج7b6NY=PT62=-d̤gf.:`K&ݪ$_E5$⥣K~5P I4E8u߷ yKQ[{_c۟8yV_`? ~L=p]T[ yx|/(}^B"4o\*I=?\9H=])Sy7Hll(;M,4" v3#wBԝZ9NqkBUU_/@A뻌a b:+V8 /-##G5_T#Vw8'm98;*ʬ+ h`sz͍%l SǸ?˷cv؜ZKpg.<\ZJooy;lC^C8kZά.$;ĒB@JIK<%+^I ]Ct~e^f@xc҅k/{Q*8 \Os]m:`[8%ܾ 9+ͻKͻǐ87R_""j*:V=\hBݾ=bLS+]qXʩP<,&kXke2Z*P՝ɯabgxI:7he]F)5j\rJV'7VTͬhzNxR5@W03!_صXЭi¬6: W+dM:s+Lٝvg1|am-: hϧ&a~H5y<,b n-:ӪZyW~3Vor׺j {孅RBQc 4&z=p4=˱Dg`űQ]e^фߠTxT"VNƷ;?зgh{ztMXTT\|ұ퀬i5&$R\gZaq=<}剿`!aX4=\e"`2i9hs1zE)ˊd @*L5!ƒ C?~opHֹ,.Мw0Km#"g2Vpβ*5 & _<1WQ# =x^ɉ#Mk̥)h&@3#ob͔`5-YDC 7UoL5q&5 =dK~>_\^&.wE[::cŸ▊򨘚( 3p[<Ɠ`80N&&hendstream endobj 261 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 5550 >> stream xXXW 2 ENlAFDA`X J\ҹeX k-h,&AMr&%]ʎ~}}b?~J޵~Cl[N4&rl2J+L.a]^XPR*xז4iW$CC+FpA4P IwL$\{R* qqZ-~LGuNXp_Fꕻ֡E^ rp &wќ8N6Xyoؖ_iˣw4pWm;3`.Ğm#\nYh: di( }^j8SYT46I#9-#)Y4cfo`s`ߖ`7rYIE))($0ʓ Uz#p#."E^ ap~^D/^ 2ƅ} t^<,?֓m`\מ.#&z8qoyӃJO7*&6090ji]ɉJ(oD]Y5 g:uW]X Vq<^W3z(l-^^<|]i\gTf8H,oR쳉kopG-/_޿3'6z~>ttG [xv|Kw&ݐwi9x!GHP1C$&$-Pܤ4Du }`3͎juO݅**$gg 0ESoޥ,܈#3RCM: [+;0XoV;+!.cmH B+l^ NB=W9ns&A(yقoutFc〗D4JKHwYDK Y_ѤP=EɮIj$n)YA(D{LI45>K{1-aTh}u9{(zd]A#@SF^J|=*>G񋏚HHKIFLBJ_Y1g/䁥ay{ ˳Wui I3>" =_ONQ2f=m6 ;1x[s(DbR̍?I8[-FhYđ=N]l0D hG[hχ #O28C՝ 2Vyⷑ.4y:Rw|F,[۠7XkV<{ݰȋ/ tvЅgOqࡤ){HHq])IXGQWlXc?ƒe}TtmGCX.+k7x~'y qF)%Hy"w}--t}`zE9Z1UabrzFRbO&Z~ZǓ+1֞4J1/G2uSbDřN4hh yC^E8RM+Uf-C7eF9q#~![+5\\# B6Ȍ댘i=ŷg pы^cd Tw3\4Ț8g:Mtsvr'z $wΰuuz*!fR+/ڳ:),%jH\x⹡N#;A9XӒاe&6LQ#L7~a|ъZQL s8G4s@~l`'0?br,p{ Kw'1!Vl g_ϞHz^N]I)ؑo.X3O\~,q-w' o,G?ń;yX3!׃ks}Kτ\5̼2"'>A|Dϣ鳨@~$0DoEノ{LLjgSw 6Z7On-x$˴`PƫD9EwGUm+lhz%RX!~4"`Ŗ857r{N=0ݠD/4Fט'7W b`΍Y3&.p76Pb*?c%H:f2oMHOAyBg% !<1ܧЅ(OS*cz)۰素<]OWd!ubKțd}wܖ!|.B=_\F|dY\>/ytr𬎎_Y,'.0h!D3Vž l+1_g:I`Ue>?& FyWwnS䱅 E%j6G0]9@_.'mH{+®?Lt"h)_ch b/;%;]$l;D e_tȃ6R"up)&)?I)qwi\xA?}Uj,L7d`)НLm#ggпb&Y\awբN_"\:ާqPRNQ)Z2irôm$_}mFCR˵Y()OF(l_hW.⭗ut3Wߺ(D%#^ E]tZ/RuB{1O3`TAO, $T ;Jʟ]63bŷ? y<օ|ǁg(!G;ȑ eDjٗ'δ<8uPy&UP[[/r|sq3 [yoz5(UTq';K:i=bn]u7g'B[ QL~+xc5$SP#i<Ke{NGuvNt@*B`CR(L_ -:w2[Сn,Vׯ^ `]&?xHi>8!z{솵A̴9|wݯPf}s#XХUC(%)3&+F;hA6QOw3}Zmdő$|uyb'(e823)lEŨ@U^ J+L7 m]62}o4﫯g`/ΆجBh4R 0nAҒOkm/񥩻+Ll'^v</ybZ.]vI+OGNTޢ eٙR&'XOo TC@-'2rc>q!*0" ? rddi)$! J!6u1V@7@<l*Qrr&/#DNh_a5BCBmnfvF aoonUv^V6WMл r.;+!Q½k{aUFjfJc 4SPLqzgz/>`0i{`8x,9;#+ 9y6QRg %'RWjԊ`rQ[XKO+Hxlg^~>:?8'gf*ryٗVI1V mBW6qsY#K㖖҆E=$endstream endobj 262 0 obj << /Filter /FlateDecode /Length 1347 >> stream xVێ6#UZ/ ݢ >ZĖI`$Dp93тeh}5K.{]-̘,<[=̢ s52VgYK|S>w Ns\ӛeo;',kUP81 -YrrGG_(ih}׷MX&\qNS/~Cn'G洹Vpdq9:Bu& 'M.F.6; R ؚ`Cs=iwP7:g0P~暴ObMBtǐ&wRr,Udq H zPH5Z[L_D2F\>1$h94OrZR dQRFWiq]9RPBhz%~YL!rIS?Yu atyoElIylBK6`Mzcs*d(T0ݵ L!dI:WZWHRCr& @wd O)v'OV!V&tTH9F&JMprR3Ӹۤ9GmY۶,cJFhK2oaE#梒U艒tka3O,%?#hܾZAjPiMT0ncP`}%aWW7Ż9j#w}/O`g/v"O;RϣYh.T dfMP<($a0RUf4D^O{yQ6 Z0*QX6r)\D`"JI ~E):Y1$ w@] w&! d+ uPgl`Ƨ~%8^mFCM~ a7txoCstj8:|K6^"s x+6L4qV& 3D n60#f,ܵ -\mN!|ЩjT޸endstream endobj 263 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 460 >> stream x>StandardSymL-Slant_167+~z    g  B alphaomegaaw; ~e+N@q N0]ZxfkS\oC(3)̸xyrPS{qXxfrjjGtJh%cZYll{uʋӋәȥҋRB~\**@{wc<3KLxgtr|qYj}ܣΑwiiwaFZkH};jaYqrxɋ̋ܢװUhl~8gV%!ǺϭGjNj Z>keO5 HX}endstream endobj 264 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 5777 >> stream xY xSն>*`4 2U/WFR[ZC:OILmՔI4TBRTPEWTDq{$M }(dk_Zq\&PNNNS}&o,^'ƥᐻw,(vuWgpui{l*}Oyw*zqmx}BKI^SRC6g JxD;cb ^dWrm;4_v*+6/>zcGǛs<(a)y5?H$rsn`óԳ>4X[Кz Z6mЉ,rydBZ%* n*MEiV[5`TTherp5 werл2Rn]/tO9YD7fMur&XX49) G_Z~ˣ'՛]L+@ y¬LHƬ#E IP"庡8NiBhOϜyєa/b 12Eh.d }=y_ QҠ`+:1 bqFoYX$x:4__ٺv׶˽+&RKT\wumJ x 'g8o!~ Mŏ>u6׵BӖҔ'J䩒=`/[K>.M.&H(+ P]*j*Z*TZ}Y\͜L8Ă8}lAcB{%xMAܟ-!@f 㠯NcԪ\Tэ/-^2M"0h5=.\AZo-ٺ~+S+*&}\rkr tW1<b!5wY '=^o %eDB tIu]`s]W{ckYwf6Xܟl- ʱ PdTk52H -i L B(EQN,+FPu 01h.8"i6TKIuyPo -jmVV-ђe*ep$} ]AI&td` 﫵CXm,=rZk?ܬ.)?p/I 84v_;f@_C 55j.5F WF;݉8]Kpx&8l=MHzeŋگjm>KB7o3tocٶ7<ڦ&Ip΢,vB1`5|F ~%?зW&3nAo"o*ܤ3kjgrFK4*9rvemŋ}r |?ZOvt "U)>Siܼhq;;Rj"kД߯Ig񔀵9YAtB7́Q ̥vi"W[+qO@r4b}prJ[1rawF}]!sFٷϼqiI<Mǃâ.F-_v htn&히/dRt {|>> XB,@:KFa`(zFGO Y9u9k*ɞPMnH}C]i.DӰ~ORgB];,k|⛤f@'ٟE$ZFgVǛzzm6тw]X̌u#od{eZU-iC@DOj;|.{{{w4WU(-`OAGn9: !*;*)I*0+|~n6[Z8ɬ?=l|HKXܸ~p oO4ck,ITPVu R# Rkv'󃃁IE7FYV_$\P =4Jk32oJm'OS/-3m N]LKGZ0N$W^ 2TɊ}䭇Ⱑ3w@/t[IdjT Ev) -,FZ؉V2hp9 5&;pGEV2x*U+vZ6V'pTD"f{%Kݖ$C4D@AQUⶤl3t#(_JV"t_GC+ ͷ6,rZ=9Y%ܸʼnȶ5! <n?v8\ċ?v2U8lpfFI8u55r!YQ'G!q7a Ж ߓ/.Gd Zik~F#}LzO_|~o6[I&y?գzwߞp@I0V"᥾["<뿹x4q%k=M4`|- /8dv砉pZ~+*jX³*l:*ww.Y=oA3\GfrƂ\C:ы{Eimr64^:L{ ׈N'&:q4m 3mdq#,4Mi r"i Q"i =Tl1(5Xw=}ǠOHӛv@͌6219pgYnN DB䌦3h6Sc-O }?ngj>qj}k_-_ #Y3lF[!r<yI$ ra5>jAGO$ᖌ H,ZFJG AQzum3[tyb_5k>Z?eje!o#řIGy#1zaua1Q)5 Z?]OZ y$FadWBFp/>BDLvcvpv$H2(eۻùL:"..".kK~$J(a_ !(#*"_,c2 298U?`1__>zo\R"s7nb#wrr!3>UWR\Z'I0x}΁FѼ!n".O7voe9/O޺=="T1LbSj[CTTQ[Us˸=2$Dqk .tcAY8f]d"_AVw6.5->.ˬ掫 )yIYpl@ 85U(47 ^KIZ^>}v9vĿYOmKhLi3Ti\!SY`5$$<^O[HwܝGi|Bj >Bz8ƽ谰:?th!.s&YP8qZǢ=g8qgq7Ӑܮ-ӔU7"p*Z8^Y LBJjŽ{z#upAӞREY;N֍S MywBiQeQSk155C۽9q5,m :/}8]]{1H92vtXȶ:̛ǃm woQx ㅃ[ {&6VUuU XK+L8ڒ.;dm̱G>>b1eu"/>{ΜĹ&΄ ߸1L=l~Dٺ )xAy$e22%Ed78ܙK ēav>/C`FDhTxn`WK?\wHlB`@^`v;pϻŸOFϿne+JM)%<G"q.^屁&ŵŦ<(_MZhlA})!V:,R <R5GEzJP'6''Q1:}iYySqu%ZF NlVx|i&/bLɕB'(5eڣ,ۘgr,їYסHu(.,DِS(TLmnSmlʒ)OXi0 .,V2B.1?/p"QSz-1ECv N]. %"U$47굇DЄhhBn@9(a7HS ДV+3Ջ>X P5g[SS[UV_P )I"UhX͘3K•bx&$$Tewq;f<=^/8Hyъ[VzV__Oߞx{NI߻Eli1endstream endobj 265 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 2800 >> stream x}VkTSW!C Q[ҋ䊖N}h[Q B}TjB}!Cބ$@N Sl ePjU,*VrVGCgI;G}1üHlT҆թ[l916am드\pI^*R1k=xȞ_MaSl*c?HX61ø^McL41"&gBw&Yf1af.c3o3Tf gF0#aL3I13ߋ :( cYY1϶$aSO>->oo-i_'6v GO%0TY&m-ߌ vvF!{RcZ/_n~:i5ٖt pqz1$/![$Ԥ7تj7ψd9C6M,9M ?Y  f/^y*z ADM ,m%$Ad!k$tE#TrCSJi8>Ԭ8_ zΞhr"?gG{Diހlϐ6Lp8Ѝ~3~lP6baDL.Fd 1AdS2Pǟ#*}-Da;~j{ݳWo#ZhZ݋&fdUi5/Im.+˕};f,9ӖZ)aEW`Q5#3tsGn Œ]׏w{FHD/l`=rrҁ?CONքLR\VB'!F6B(@CuA$-FmcmqHd?3g's0kz+] %t2Z@HBU.檪N~+ڰ jupYG>턲RBӣox$'&: 8Dž't8#+\4Jv+Թ\ihYG/O{;ߞtKfāEg p kPvcBMH޿ˣ3ФT[JM ]qGZ7J-E<-3_@!O[qZ ?nԚU$,?qu9t~e/zc`?||_>hڙn` cRބMÉZt2#~%]k2V.ww $eŚ *rUfF +9" #}ޝ5Ȧpソf|:m:R.X莥R娔ɿGjT%ZI]"FCI<NF_BJE-OUUFTvFou-tDЛn}?v;(P :-Us]KHeo:ot]Ѝys|nH ~\"9 BAD 49.z>tt0NWR9|b_?/& Gendstream endobj 266 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 125 >> stream xcd`ab`dddw 441%}72@1!EG {33POݕrL++;WO|>g7W7w79=%endstream endobj 267 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 114 >> stream xcd`ab`dddw441ź|eaaThn hȢ29 xq^^qq^+/^!'b>G0endstream endobj 268 0 obj << /Filter /FlateDecode /Length 452 >> stream xuR=0 :WhFէ-֢Ptۡl"M~Ⱦd)4H"G)g|03Ֆ_me\PmT #@ 9QuA:[N1˅&eMۉ<7?}MکZ2,|&Üհu,ƹХ!mnSy-,PIO!d{ҩN~Iaݴ=JT8c[akPG a qNbi?!؏]aƔJ[n*\k$3A){vugt F  ]/kHi> /Filter /FlateDecode /Height 480 /Subtype /Image /Width 480 /Length 1044 >> stream x  Z{Sl +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q +W0`\q?endstream endobj 270 0 obj << /BitsPerComponent 8 /ColorSpace /DeviceRGB /Filter /DCTDecode /Height 480 /SMask 269 0 R /Subtype /Image /Width 480 /Length 43969 >> stream AdobedC    %,'..+'+*17F;14B4*+=S>BHJNON/;V\UL[FMNKC $$K2+2KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?Z( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ({xIcEc ?GoZآ% PE6089nsZ+wI5 Kֆ$q# $BGbG4no=V+>W ᤇC!y5f}'w k->c8 ..#<.030QX r_<= "P@ECxznVCq &9:1FEeej0\ hbtBK@ho¥/['y_31hc:/ٶm΁=KEdZG{z$Cϕ,H@ #(3Ta?&4(VoGo+7 +?7G ~hB fQ4{-ݴ 8#efUSnA@.n#)!gt̖I4 b`2k3\k٭#{d]UPAu5)VgFҰu :L9$*RF85Fx`-=B8"DqUg l[GrU]cbn#I -~: Ƿ t)r՘RhC!gM~4 E̖24WaXv2DgQTV:zp52DTcxz)b?QX_Y\[Ir"AKoPCEEs!Y δU#V Eq ~h fQ4Egߨ>{AYMhQYa?&Vo@$䆝'NMU%H&[|׊wyP 8ΣYs4bY9jո0 ,GEW[Mq1"8P3Mchza%JGV ? .t=B[yuQ` ntx1ʁԑȬjc =F("Dq Eg4mq^dD*H?4Cq(b}|Ecz^\Z]<=I$4C5e}'wP\G$¤)a\TΙR]}?.V!A{PEQEQEQEV~!-SZЬ&-JN$febqFmE]n;gۻ}~uKd 0] ?Ix.4YKkm.2+ t[ ] &$!cK rA1co>z= bQfohGjwϞ29 W[V2/+v\ K5,"$JqXŸM"PM E:FA*EiX$E*R @U7-? *McY_ΖoZ׬TܳhQEQEQEWSmq/j /}y[%T_E9?G=_/y_W9QEQEk!Zk!@?%hV}?%hPEPEPT5rUǜo,_ǕsoL?i\SAmgm?"-QEQEQE!˿BG/C-_(((^+_BxX]Ckb!GIcoDj$BL?Jط#$r @aTA$bɄr3C4 'Z[<4j <6ot (((+dS{Q?$~u f_+@YQҟƎLmwRҋwW2\3&p1ɮC+rΣ3T{ W+&!68皿_pVR~:;K{*HaiI e$`x?\ko>A ;k$h$="ȒĪp}p{Vk1zΒhlbre} MXBoZ׬Tܳ,Y(5g_q:Ц3¨$jugh77Xo51sҀ15}Onw,-k8Qǃt/?7>b5$h"f>t(m\IzsY:~F4;6ZKnHܗjMTnpNHOluJ1d?HEʡ>ON;KS1 !Hi=*nu r 8Mj ,fܐ.rNZ<&LIؓ!p2ČgϾh ۿ Ǖr_Lw\_A4KrzT4_?s"ֵl./ل|m'=sx=SXR͔1F+#]->Ҳ>k(s"]ls@Z:ڍ>uvdcwOnh-kWrjDponX+(<qZ/:] \h˴DZ$dpqRkzKYGb!V9\( ) px88 QV9o]T X8lV}?=hV}?=_S액Y_S액@5@Z= w2N1Tn ]i4F+[4ȑy{YYC.2r>pG~ Csx G`u) e}J,F(fBA,T U6=Ikh żXQ9k:֕gZi%i_QI>*MoI'ceݔh@O)KN:ju4NwZ]#|7fyAQnxv}ODG4ц`}_}[fG|| A@==qտJ}WͿ3LmqOSi )]Jt:巅#8POk NMQRlqld c=ym_%ͤ"h`Au隓if.$ff31wTmy=hmJmb.t&fy0UP2$}>Z5_YA$Vr,u(x_&Aڤ 5/-58kշh.!2A`m';rj{ 6nV1kFqJ.n}y-(0w zӬON1$2{bҴ(?G/C-&o?_K. Z\u_[inod`HS]`iڔZΖm_±)aNN:hk[,큸@'<*ǜq <~ ( ( ϵrg ϵrgCvo+>Cvo(((y_V9*Pֿ?@/i )}y\6T3A?@((}azKI!˿B/EQEQEoA藭7Z1\ܿc[ZD"8cB mo1Ow#fKF{m'FVeDF)G#=y<@Q@Q@Q@rƉkٽ3HJ*qkjGs$ ̯7P3N芐HKfr~-̓@iC#y+e_qlUoyXy^vs>_l}e+5sVr^5O#T%lo$lly3|ԫG{y:>w|ʅpwFGt>Wh_sGq |vt伤9el }qNмA5BN%hoSWE}e+4{i˰ɧ˷vz̗AemDK1C9#֮o=1}FE2HF*?*-(.EqKP@ Coo#K(v*bI u_ty[Oe+4k/_9u_ty[Oe+4k/_9u_ty[Oe+4k/_9gid ~21vn7U;? 7I}e+4n>vn7U;? 7I}e+4n>vn.CQUb%G?|wݼ,rE*89 EѽHd1+,y)n>vnY$%V( dϕ_7U;? 7Gϝ_sGϕ_7U;? 7Gϝ_sGϕ_7U;? 7PGݤq=h&Fef +,gK}b%d[]<XH `0p] i0K"cEREZN3¨>՟iz2A[}B([Z^ԬT_4Hȓf;osV4&Vu)ƝgIF}f%4ānԎrXG c0P?XZ7!{ycEp0,vNx8玆t|Aņ;p"#t4 meIJ*%-$p34=bhI`f2N\2v8I=mBYb坡YKg"ŁlNXTW_.L2-Fp SGbCpEM@&zEI}?!OBטsi{eL%I#ǓP7qؚMcQNfԮduQk ܞPh+8w7t$.W=31diy4w}Q"N,/BXN>+N;U[)޷4{fh.n[ʘGG0;VUSitS I4( n&cV3Smq/ /y_W9POSM<%]J1ϩ#u[,!3;[p=J,k$Nr=5N6;dnV§[*r2Wh/yہ鴭k%弗r$ʆ3zޛ}tַ2g*q/MNokXBA%͖$Ok$H-fbx-G=dTpW`EzgnCօgnCu!7Zu!7ZWPm4ؖKvڻ5-7pG=,2 rV= ,́X=7FO֓R"\!ی2~_nK}(akBGqU[iji\ȆE?1P@'+6yg 'MVL(N[w5;[Zxh" -ͭwEOat k<~k_}[ q\S4E>Rk#|~[Tzv"G}tGMJ wmJ#qrO%t`./Kh%S,?V,wt[hcxǣ kn{yRXd0kLMVgvtr`Х}azK@,dY  dz-]- 9aRvO}{@ g2bb::j 6 c-)tJ, GJZa/.Gs M5B&FU{6ĝNG_jn5:l缉.F[OLgzn_ + q.Q<}~\Wr79 -B i[VQEQEQE*<ڃǺ6P%bfh [n(=㑐`\fS^[*ͼs+'$823\ D[# XM+\*ԧRnJ<׶c#d8yOV8hu veD/qrۃ$CM!HP$b32Ss>K.rgТC:DY&(: 999ϧP1L>.Җ(ةtMè!h[B O-b? *ΕAZLtEBT:WokJ?>.g?Mnos%3P'04wWuڔX0 %(ϡ4?bWЖ]"mC#BHg8E6;0ЩJe駫ggEe4]2v&[LylǙ!?qZhUw\_A4Kn&cVWOWꆋrz@t=AQom T;M}f4dɏ͍ #N28{TPb8̡Z@qQ}Oj}6K{;xgd7ԁV7\B7\݇rd Ϻ݇rd {ckCCswlқ;b!Al/;TPl>mI4GKd t ^hbif Uʍ2*J(y_V9*Pֿ?@/i )}y\6T3A?@cc$ZYۼ.DXsV cipFvt*#8#*;pOOSE퍮{m YݲT3V(GC*" * }P~o?_K. ZMܿΖ]׬~^XaE =( 1[%vvnb(U`rshMo${4J\}TPYt[;wR8ȫTQ@-B i[VQEQEQEuSiU gik2M).Z_:@*Lmwb/\JD]ab?
k>"!'-"dIp:PR&O Vxefy5ov(C| .-׭DE!()k>e?Ə/4ϴ?v(6_oWhѲOC| >k>e?ƪ>42"?J;_$qG"4T(g_ΞJ` ?'ZѲSr$Ep9 ch@ ѲG[Q;_/4h_}ch@ ѲTkI-Hv7VU|;_kn`Xt!"ϴ?h_F-?v(C| g[?lk>ϴ? Wvbq98'[zVE16U@5-QEQEQEQEjz-#tTXd@z+Ԛ׬?BZuOȶ7W ȶ7W&Bʒ;p####O -C|NK yr}As:R*ޓv*1rvE;{V#EgX%`4 @gsNKr1l1ܡ#=2"m٢dNYV%>&,/LNl[T%`0k"z^t]/y_W9qַ_FYh 5jCbnk=K-I-Rb>cW5Xȗ~(\Ϻ.U~Ss]k"M&KYta$.r6c4e41G*Ft`FGӓKu ]_LG\%̋kCkįT$ْݲ}qk^|즊,F"3mwnq@?=:?w,4kLYL8,I+N7\hy&X%b"qg?-M]nMVPȶ7GEEPȶ7GEEPȶ7Uu !hf.`5ܬ2 9٪y_V9(6 Vu^4JItܒU}WͿ3LmqOP_ȶ7W ȶ7W ȶ7W +eq̊G.@LI[f%$vX$ZL)ty_r:X9w^Rm=/:o.o{u~m=/:o.o{u~m=/:o.o{u~Ȓl-?ɖ;2Hǣ1Ek.iH<7:n1Z2Dī,Shh+9g#ZBJb:z0x=o=L%%7(1錌 ( ( ( %TkZKTր4(_vaq89(ʯ0PONs$.Yn%@v| ga5SPD pce*q;n rr]BXv6PJ)Õ9*88,=94 &ݑziZ-i8䓎€qAOTkwY&i%.8|c8w1L..gspX.>G#'QiYT8d)-u.MErSC?&,/K7-? *McY_Ιam N,`$?wt=vjW[yB7 $*rOl}ݧoT[ #bDya$ @%ONDc.e?.7 y9:H]ܲJT.'8>Ìj෺kg 0LNx(aSƝQEUw\_A4Kn&cVs>&S9%swpMی䓀&N[ <wNP{!eg}3DO? ,uMa}roA}^Mv1KOFqk*M *r M\E͎Flk"R0Mh9`'Rv"+V)5cHM]V*ZtZvS>l Z)k!Zk!@?%hV}?%hPEcXK"[IkP]!A#ؑPY}MZ|shfr +($` lq@W1mVLҤk81- iڼzx# y鞴U k<OwXOu,r$*S2_z=H,d+I;UEenLr}Kмr3޵(?G/C-&o?_K. ZEOV῞, n69 1ӭtVny>[aY+GeqkO/,CdҲ&YA%q㌕uW{RRXK}/6`9zWa@^ -}I#g?_kbtIp6C0 ?_@ c;s||iFVeDF)G#=y<@Q@Q@Q@g?赭 %Tk@eFudhrG z<`Y2]`kx& {r:`gлVIRIRn]e`"Dl8N? b#KK:0r(vfl@'I52vWF)*d캾viz,7ryis1R'us,٨UDB9 'o ft%5ӌ]RDqJm"~<N@8ɮM!4X b8=I'?r87juT)›_Y;c H;s>*rY(5g_q:Y9i^QRkrt[Uԍԅ ȃb6H S#nZ%1,- v8͜=Wմt9H`iz=Tm 5M.FrVFɕ GH'tASo_h\XEX/w$hvgx$^H'4$IuUE\nF##?:@F0ywšD4Wq4F`QV7pNp0zf }[Q1/> ϡt289R4uieX\lħl!~`sCvo(2Aӯm6%2I4q9݌8?QP[[˭B9om-`+$'8E`k^-nDF/!Ðpn{zf.-W:=VnْX P1`@ KFӓI FD]$2iy_V9*Pֿ?@/i )}y\6T3A?@[wk5tS#FY:vsͤt,bhs`cOEfiuҴ ,[FXn}f4K{ThIA=ZiK0L#,c{%;x~(?G/C-&o?_K. ZXs-ͅD72p #q[TPp;UKOF1"0>x {{(4+oTԾ$<"0ߍNtEahSEF[pdKw628P"3,DLё-0Db?B3X մ7H$@kbW A$ BeC%5+`PEPEPEPYvo}I+m_60;|+*m./-2xpq9FO<}Le}Jj7#H-heYXy18ip4niwq~U1|5K TU/G&NÆȩ3]g)zo1ۦyyaMКG6G|f%n2jԖ#f|p:}G#:UcV#!$ӎI2[}&Zыs(R c==2M:+?ڟ _Ώj:Y9i^QRkrtW+z7WI1H5Tg*I*]FѮEQȲ+2q@[(UCOXfw'r9'adVZEtot΍܈Ys/0I#IvG}ie's?3]g)zo1ۦyy{.;TmiBbEGm 7,9>}Б..Yfx8q".r/HùbpN2K 2[sɩӵ jJ bp>V8Q4bRXžf,rI'۞@8S2+-J%*D-[INxqO6A? 5hؖ8݆N98=@l/sjS$OCE*qk،~!EF !('ڀ8u_j#]CŞs[ ٘BnT,I'P?N6/,e[cv?Jwwq$7-or..>MS7o$-WG 7KRx~Sh$Vk8m%dmсzh滒\KIoA7'Ѽ1yZKo%Y[vM6T8F1XK[V7))>hկdD6?7}͢Dݹ$1v~?<1Rq1p'֥.VW/nMVRyuXC.Z=xϳj:EVٵ?Af!tEgS mO/g@CZ8G%'ٵ?A2[ -n/hT'k;q@/i )hı -Jc5vơFm{mO/gGٵ?AhQYf!t}S mO/gGٵ?A?ݼ,?X?)j]:ѭ!uQ,H32OaQ\Y\$%H^t Vٵ?Af!tEgS mO/g@Vٵ?Af!t[Wރ]/[56%żNY8g%J kJ;hPsſS@wn#Q;"2d+^mo忍f  tؠ3v;((+M/KEݾ_# o^[5.]5ɺCY8v3C=4..#1! [!MV.ۺGhCc p9Vy3c<|xIFT|ʹr#QϠsI>[Z4[le8ntǗO=*O$,2ꌏ摸q\x;zXu)Tpr 8#?:®d*|Um[pNA$v (N@?+ (?: Т*imy yRG#Fp8Q^ܭZڤ"Imx}azK@ OWaGu?_hQ@i}S|VOWaGu?_hQ@}tpCuh Ud!ImY-B ( ( ( ȗYxͿaLJۂ9svzǼ:!Hc2S tY\a0#5*Pϋ?Vż7Q"Icj$rgQ'ʹu-eK-ۤrDZ-|Ì9Yڏ젺nDFؑݣsmYxvE2xmʡ*q<:3UE+:J-փ袊fe 9i^QRkrtrgТ>}ה8ěnH]ς310pSq.+;f%Cy8d⑲I DeXHf*v9bq3jol,0qrB,0vOv{mRm%viNMB;hbTdF2&Y>spx>PKR44PU?;Gp>5uinpZ W#ZeƱ/ ]yH$n126CTt]: C1$QFG"CglvI]v7e0 w#f{k T7H,>:w ïCҫSAu?̎k$%C)V?o\S5 tOM><*d;t9}Bh#㍀۸LsZ.$}|l`N{g<(X-tYq;I-y=݂qj=FFzXIoP0$$t $ώcp$^3rrHAy\xy0x:rpҺ Ŋoo9a>Y_00P|;9pp55tiO S#^2) #Hw@$8w'd"!$T{.=RCoMos7דH?4%ռNYGn5O[1oeHnm\G`NQ@ܟ&-V~h90 ͎1g ^n.EY-$y2'#i#ӯ.R'xnባI¡,N>fpztrxc!bDd=j7HRxA8ZNZC20|HN|qN9b?(bX1BrpK 4q"4+*]> `QNoZ׬Tܳ,Y(5g_q:֛TѦwa$)wHfێミkR&bde%v-g9d1kE%>m,OC6j_kDvBߺxSpy#=V-͵e/]I$q"'r^B0<r{`ps]).ݥh#rf! 77 @zSZ64bG*1F+VH]ܲJT.'8>ÌiME1љ"F@0`t<`WSmq/j /}y[%TKHS-RƍOp9l&i71OMWX#k)bL 99I#&1C)I999͋%LaJ gSu99{@~Yo%'@sFFke2( xI8Qio6VHvQ0SL=QwAVh;F?v9c9V-Mmѡev.lXÁiF9hO+n9#nj^[GHNQ ь@dg<GY[ב5N9z/mʥȌO+n9#njbd:Ρ檫yq`+n̘=$`֯AqoEuqJbΠFq228w'd4ӳu!7SjWi;8H碏s*qiqq (@>J]>{oG Rm3V{R&uӒNDxRU=9=o M,2yLss?פ5)I-6y/_@ v?եx*rUM,⎾6M $:xǐzUmm- "t1~awMc.rVP =%.\%ݧW;Zk;^y EK6i?o\V}(޳4L ܒ5f- VeAGCm+&'F7cw=}Bo-w呹rF`v€_ 9cq9?/U[bXXcHʄt$E@xTlWE>$"V/Pk?0G-)_o~(β;@elgYϝ_:|?햿un'&Kum+dA#x=|0yvk?0Ey\6UOLӬio?t{P϶Z?Q_ K{y X*B;[>qι+-R[k,-Ɩ>}1m@w-(er6ehA(>Ǧ9c+WRtۘmF+S#8@l햿SOM'RKK{y"bF|  5c:|XGY20e=9$@TBz`*E F.¨~њ" @q@~k?0G-)_o~(β;@elgYϝ_:|?햿}~ae>v/K,wc7nVԵ4KJkkI4, lz[Ҽ c$$ր%((((?BZuOYO-hBG#FΊnBFJpHO43qF(Oa@3+DL 4(*P+)SH<pz@&zEI}?!OBX ( (K"ġ1X(*P+)SH<pze-b$}Rrg ϵrgCvo+>Cvo(C,sĒ$R(dt9  SȳD(`u*p}Ac>GHȊ#nr pO K H0<qTrUǜo,_ǕsoL?i\SAm{u #H-H; u$P&iec7X=1۬O|CGZآdԠ.5i-Ԥs4N6)AY5kcD[=mRTʯ)'#ɮ6̪QdHϧ XeipHMwu%DGp9rk^(?G/C-&o?_K. ZEPEPEP.!մ9nf`~ض!-˻ʋ>^|0ߎOO5UmX7$F899< rK~or;r1"&9bT02H|F<Ҁ&)YY]8O `ҟ@&zEI}?!OBX (8{UC|K < &O5@U1m; 9y9'ɂN[i0G㎵5UXcXf0$yP4vDWw@EIir$'ˌg'6-HU.}^tE쿇sB$ 7.OrNO֒\KLSmq/j /}y[%T/y_W9fO"bYL_hj:Ls8I,-jrI#2zoaボs<d-&UP! 9yHM쬥@FNrs q 4YK|. <$C!i7l89<sUUm8=$`ք{+)oQ瓜d ϵrg 5qU1=8*?=_S액Y_S액@2S Q*n F22z@#$ &VR '998`["0ewv#vvFFqG\rdTSӹrcל IUT7CdF9p$TQӹJO ׌s" INx2@Ȍ݆ۓp3RҖ ( (3y_r:X9w^Rhvtr`Х ą16zVV}+13)UU=SNSFQH䌀񺜆g(7@Ojھ@bf-SA=ǽgxj1̎~qc@lrs[*&ˣq44y p 8Vͽ;p6#?.pFh3fR;#^ K(Ԯp@lrs]p AK [qn=Ťl9Hsd3[```p(%ƭ"^c~Ax!43͉53jxO>\4X~[u ѡQGg- K*u95i-bv# 2znPEPEPEPY(W_`rxF|rs1Xjؚr6sgn34:-䡒8'ۂ߰ ճhTW@ˍv8|qY m ŷ:lrX#|Iϭ[U9 7yǵq5KDoFeQ*Iufb֑=/.8 ,\aw!Y@E63M q"猞; չl-Y#;m9|p`Gg2O}@*>G8'NIgm^Ymf߱zc8$`O>q>&*p mݜ拕ߗވH徑BWmԻÞ9c'=:hV(ѫǽ/=`mg=|;{۰ DK(<{EmiP]Y})-hvk^16 u|R3MhtԡIR4]Z-34D8 GQI!1A13G9=xE[al"BJ)r fs99=Foj Syh ۗϋqہ=pUq\WYAԊ$Ac$g #@+ʸ%tVYWF[HK{Ƅj2+Դ[̳Nͬ0ÐI>c{%K[ݹ8u Ɏ}D׼Ň(dΪMGHMVŤZ 遞sZ5PQE!˿BG/C-_((( : <$x+bV c#$~OI< ;/s RXFF ?1@UU''V=_G読#+'v !mp~ձ@Q@Q@V%֜Tyyo4r;3 62N\1PHJs+xvPO@kRԵ J;b%U+/}`$q@n4(.c縻2ry 94tE /o=nX`ؗ䷇191c܌%_G4d MC!7q:*@툳Ky?k/3Nxe5c+'-B98bsr0;c_GJHVu%-c;Z )yrD$mFY~An0O8mor.<ۉdT(YK K{g")K YAUheRkyc$8Y*;5[1,$`ȥF:m1Kuzpqr?Zt2HZ.X*mR@{Cdqnx'X_G 51T+?i|W,V?r }4V?r+U?9@ FX7dpUkAcEDU+6V YvDƌG?Ҵ-2!.ϗB%J%cԓKHoo:f'/fImeP`B댉0|LV2}(PCC'<#98Gԩ`Jeoj`1vy>Ԉ4ZV&Ľ989 w?_GJDMY@n-P+`hpb^&K\Zw9 S;/J+7Dyd\id,p31&K\Zw9 S;:{xd1*2Q۸ Iya#BR88*/4R+K{1fX{Qo^y8ҔՄh*THq@% Aokl.obRs..~*dMY@n-P+`hpb^l#̷s~drpOC ?Wo ShQNd+}?\uW,V?r"C%^SΏ2..^rA*C Q jD3(%OĤd{*K7[ibicW+lT"&Bs$pGs "20ʰՁg6o o/0%_Jy7$ZEkpoDCp9zդ-bfPJ=ʟCH$SW,2n ]\C#$6}GƧvqP%}Qjg(-GS;?74E-<۵m9_s?PuZuqb3rIbri:lW&͸F@b G#ѥ\Oq i1LJΣ[u)m8Bd㳏_4dA_G{SӑP ?Wo }Z=Bu+jNTC%)E%+?h_Kjg(_G-sI-J e,#WB(z "XfSFr\zVcYK|M(dWb86zS[Q@Q@Q@ffpS ɅS82v"}N;v檩"?( Imp K#$BGH%?K[\1 S<\I!R[70ZDe8cЮlCIrHXlQA38'  )[pe1rF@=8=(A i7\y91MVdR2(l[c>lc RIeXPH ZEplxH;9p%?vo99n *| $anwzyYJq?hmDIv~l|9$ml٤*4X0e[6r%q =5HZ=pW%pss˹ t{+)oQ瓜d-0I.$/E q=IM<џDymp'F11WF]t/D$ |VmWhNS¨z6G-4W]-$ޤ)/qh3j֗߱{h|YX}8=9in$h%s99=:ZcjZ\zC(T67; tAOed`]w*g趭 ?%WmZQEQEQEVMΝz,*Y1)<=FdK1*ADr4N:. mlUT󜜞\[uRE ?#XMεP=@&$ ~e+sO'-{XgMmN7ma˞8] &)YNXҦJs\K9R48{UC|K <!g1WSmq/j /}y[%T򵡲^7/rUe_Qcᴏe8cjH#{+!p8N@$0Nض"rI ۱4,uG+^k78mޅ\Q:V ivYǓ.7g)Hc\ L`Hu=8CĉT*N BNrrq8{ )JDj"w)^I:r9m;S0\iıw%=H{յԖ{4n@r2W=8Osjn l,d9 qVhRtedu'i=Ub멋hC V;]s ㍦ 'P]Nn2S<`99qij%̯wpiU K N j}Rՠ&I7YG 89szG m5D8|ؙ#`QG_ǕsoXH1F#dX0F;d&օќ!Q9MQ>)gHI`dµ˖qt皩uaeO';Q8rz+'d.} Y%]r?O:{Ru+ W$FE6r&LԴդ2F 29SY/t-֜X24gh9>1'NxX_j6{IDTAVUEfu&P]_EhV~!-/jРaEPEPEPY]<b@e7L$ykB>jI \cؾ6 s]O?#kA\ּ/2( 32sy㩭LY㌌&BL23Q` |=bN+a("[{bg- X9hڣ.Pm`'v-ZPL}59O#`pvV6  ӌq;d }2;E؅b*2p3;1@y$ٿdyy\€ $89g]v;sۮ=$ΎUmp6|W-cOX oI( rgТ>}fzEI}?Q@5]&U duFD)Y7c$`Ley"Xգ[,9HQO=y m-%ٴ,h8Í`$9JTTi q`Drdg8zI'#kLE$!]pw GS9/e/2v\{1}0bo-GrgyIaFaѱCTP4wE?{ȣ1_VkX,y$'m6@N0`$86IAO"o?_K. ZMܿΩjfqiX`fI[t(d;L̽^Sn}jlKKt%OXc%9?gWwi[7UrgUU9'8%e@s j⚒FTf/jfICx?jU5 UN^ԫQkU*No{o/ i.ʊڌAAu%}6NZcR`ad_GcZ F/ƍ:Y-l9\ȹǥ9=NDX-ƗWĶqC,Pwʤr]p1'c]}GwUFNqm (((+[3ze.]2%7ONvkzkKv ozHPnZ[H(1Mߥsڞ\/Ϙ {#9TE'9`TOo3 N}q@ꖖ%ޅ9e h,p2@lX?_ӹPJ62CR:ֱwEQp䉾?#G#ʚ$ºx}3b;X9#9A.7t y5B9 }p2sP@Xe!8ǫbKx)XS4\u(%`q@`UƩ ٽqA"e[%OMK4BGSHgEgS>{@UhQ@nT?*-TyZdۆK:(cP[8(Ǣ\C]rm!P"u;Fyڟ6Q vpLɜFw7rwTQĨSJlqzNݒAaJ? Ҋ)7sHEYQH(((cllqZc]6h݆/!ܗ/=rGtTP-!eKy7矻jQM$m suC>Q@nZ]3&ybEA3zv Z(Siw^Y/m7 OWHYRD`;*$$M95v((((*=B1]pۜU(.li3 Gs=jJ 5ݱSQ@uHI-嘎v8Min1l"nP8<7gs#6h ( ( ( (3uĕY1!2?{oܥe95-_02-6GS[PYѤ޹i]$Xܪ`8h@>8dp>N'bͿhA$éj]ql9p^:Z4P w5ݿPd8玣EQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEendstream endobj 271 0 obj << /Filter /FlateDecode /Length 44067 >> stream xMϤ9_}ѷc.`WnDDf FEtuwe¨~O*HIQ)/GrK*U_J._(?__G륖//o__Ro㣶/=1ڗG_/r~}Q?^h؏1z/Ǒk1f)wrkwRe}?Ud1GAKW+W cv ȃHDd&"YWRAE$G6YGdsОG*DDJ#2?J!;@Kv<|Mȑؐ[ \p|B;z`\E:ևh1y(b8>VD&?G]Gw3eсd )4EDf%"?0 cYpA'81jh ds~Ln&y>f3^ D. ১|@(0hSE*/a_G*"$ 'g6aČs+"ER4t˲Blsd.kgeK! K1$" ~E1D"F~EG%2oim&ֳW""-YхJumOuB\H$=Y$a,2@Z*|~/H66z a>zyª6,+FuL]Hca ,CqTOV`c(щВE8>Y$hrriEdɇVzq4jPEP%YJ6E%FK$ɂV0E*"٧dgiK':ٱfT("d*.*q[{7jKZCKQ}ZA"f2 VؐS*L]Vj,U@&6;BM](ENq)Ҡ&[Q]d.ͩ&׿]6܀_(0{Q1!ƮSE*3⤃p/ÕЩXMF9tcf;J_6Z\+E*m0EZVPT ;n~ ؗSudm :ƮбX& * gY 4U0FucX 0EjUI7`LW@%y$t![a{`; ~h\QDu"0G.E& F&b$0[yrsDL>.ﰎXJA +ysN<\z yŪ*E 6njM3%'v 8)TB)JP @W(D~Z UV|4ۨJpAGϔR23ꡪN7&vJ:դ^:AdP3ΜfºFϲ)zPU̍&3NXDs3ޛbfġ-5#lp6W`Ef\$JԌ0Ǫ)P7# ԃ*5ଉZE~]1ݏ``M֤s?ҺTWȤpADrOKTKՕ"YJ |k!Ft~!NٟUC.?Lч$Btٟu;ºQ W3U7eGVܜsG sdw%ѳR&^'&R Sڴ&Nt]L4ĸ3ݙD:ME؄~&G#S_T~L/ 3&D,s^4$Tzטv%Ӑʤz_.B[5dYLPyn5OL.;!DaOU &|D ԧZ4nJ©K4ߎLՙt 6+-ھ&f&kSXU˲\fB/؁_n *R|iSa!SUv]UGnD:]yҹ-%7A[>:Rt7‘=lztD dKBF;0bɌUGŬUYdYv Y#:ס}&H5c 3Le/ш8T(^bbs*p jDJXeyTDx~XMNzԅ@"^","2Ӑ*'j?}*}-щВAm.OU b:A1mHxUSmx}8`elID ]ĝ7=[ 2?(ÍHA NBTYjBd =*rpt+J7" 4Rf_,x*d|Q,ވg6z#Rb#N_̡>Ru8c*]u:4_:х]{.ahL|y!`F"4JpaA= {A*=~m;x"n"(B!"6GQz":sp0;1&?{ū{ ݆KQ!3 yR>H:xyL{Y^ކFD%QKZ1MQLݎː4xPI!Lq1 E5n77!3qkm&oBnaP>@|Lբߎ RIo i98AAebQ؀3- 9}󑓓3!#n32B(]M痈 fXGuz-gW`aVPT}ikkd %ՍWoUREx;<޸[WHNFbLúEǼGEtUW8Cc XgD rY5#grvFx3~ bD,6)T>"QM>+VVV~׺nȻջ 㦕!VVn㠿^wy+1ΛV['%rj6WyzZ\g+CޭޭޭBfێV[[=|կ꟪UPV[S]ﻯZnD|J5O߹M+C~GW ^s+pV\z-`Ͽ~// 4dq5d/~/??*~yFC^n/ߌ|K!6_/>Z?h5֑V"W iVܴV\=U+Gn[qF׭.s.ZdzeDP_Q&VE>I[d%|-fDȧ0b_4BHdzOyO4Z]3գ{#5dO4n9ю{m'\n{J&pgLH-ݣвIFtO s1'XʼnnS@,ŷ |NjtrAh[.eN唶x1mfcB}v[V9UOsF&5*.q3kҙ۞1m3"w!r)[mO=e3]"[Y?voYvטoTvv1H!fLN%kMq7}#Fs[3ĸ FvL'ߋ;w6jSD$8d<oe1̇v!4:xO:swN̈.8un"'he[:=,{nӅ?t܋ٷ#o #v1oH,u[nuH9 !T0:O!X_9|b۞bvC==r O5?y!v^"JHqR9ӄsEin!ve1v܃݃#޵ yL1e*zoEΌ4 #=1#*: pAlo1۟Q[ul{ 1.)4Б$t霟"'cl^Kt뎋˷ooB,'Rx#ܵ=F^0IIf{5 ;ZauC@``G_x .#41&^b<_F@W/???ph;qDs翿mɁv02̫UТ XYv"cD+Lk.g hV@ g5T* htCm+09Yx~|h%J0Et1[ eP/6Y_WI~iÞڦORnz-b9*n(ca H=;o ^na[*u yȩk ͺt&֥՝c*i?PR5Yz bxf&1Z[|}+ZxGG0[Bß,ZC֯oܹ"aO-tiƆZaCvK#c u@+X<uv鯒W1 K͊ :ՃruE017x&$/Cd7QZ^Mz~Wg9ڢ >[Oxn,sDtYXc?ގ}*[xOF>"wg'wn9sٹ[!>=z;3fȲF흑ib >%R!)1WF.[8ly83X]3cJEj#*:v1Y{ (3=J>Ox |6$]L="R! -Ks!.؉ci73z|^9D2"'# #ة sw;繽̘p>c<~.r#)o?m8l yBM TlފTDZBu}y:!:=C$m&RM3g:f83b.2yVyQ<[y/tlGB/[B"1;.Dyk}CIxQRyC5E)/0"xC~{# od鍼X{ 7gxi&Ҽڅi6`yU?nłܶvӊM+@2jeM+>YVܴZU+Cn[[6Wxv);,간D7- 4Q^/ ?XO28ʸ̾bN}OIJ·u8׵;]̅7bcjjq?,O5}.TO \Oݹ>C߲kEBh@{2FX  ѓpړ=>!C!Fi-b]s?Y-xlZ!c*h;B-\LD-ڞӵqݟ'k`p1E$.Pt^OFD۽`,<"cP;m7 i;&9U>vǸ~ʽO asu27ul}.L%#jB #ǒO-dɐ$ nǍ IHv8/Sn(W!Ķ -[: v:CǼ; !^bG)㰸gG8iOŴ#糽 蓠" S'FksNm L>dnϧ!HdM" BZ0d,[5C AUPלgHyaރ Ös!PfT<.s%8LoyZ)xD ~, B3M5* 8T$A0cM7{<-㡹*uq`IBj ̜-raWAgN7 ! 'Vg\{q4oHLl\!uG2K9_ B[~ ٙ1]]e!b> dqJEck\ 'Bo$XyJ v oOq Ă0OXh=VЂ O] b@!RfV5d`0cL܃ZҬp$v,gjK#5,;v%,u t2}zp6Ow}#oI ҦMi}{ig!-}\GI  %Q0 A^yWNrUmA+5#H̊ΓVd@D{&?$ ,toB[6*OXl!)T9' }Gu.Te1 ; oXfIPG6!2T~j2]E-d&^UJt+Z~X:pB(,n4y.E__g.?wO7zEG~a +71[ɉx3ل{uGɷS5yI,YK"B bB uB gZtM Թ(\J׭X‘V ܴu5 Gn[1井\f`M+>7_4}BY4׼qфutUBw.B҂ ~+|I6;1; ܿ~͜6vpgݰϞ#eÖ4n{odw {׶Զ pr9ܾ'^Q䝎;gWlN={eBQ\χ1s{ ;2$-̻wd2Lzgːl)R琞'!n WCn'Ж"xXtGCginS`!! pp[fl}#Fjhr3H'TC^9Q<"y#W$ aHA|3!8Qa!M<ɿ<=0ץ WҴߏ0 ;A0 cĆW2I!do+)q)u"|ʴd-1ǂIZH*RZnΚx;lJ~孓k-˸q*"]G#ՉnQnU *7NUE1tDZϫf.׳09iA.Gnm:L0XKsY&\>E$o&ׄٺ0H:lv j]#2W׳qëYL~94e GS4T4nlhi߶*ɊVi߶Jy[}ח(l6Ozvd]G"-K15RMai$ND ZKNH!27ʲDaL8d"WdmdDTg$LH"_ *i~"AFrok"#1XDX PNs梵jq^ȤoM)G'9 4>HuXNSD&G(s>8{ݎ(2ɊHɵo-2 OVU?3oX-,A QfѺQ Np>(HckF.N)"йf$ ۓ3u`TGNX$q6eIg7݉Z]B+0#WoZ6 o0mc--E2d ֘jjFXŃ-J.C7ˎq׍,ex"M=霹hai:8 r7񦉄4 U,zغ.Ɵ K7)`dmV"*v)`6EN0 1*\ Y6^aѕLV={ R>@>P 46i0:Mp_*{PPX:n ²X&eSMbCfhh|oATg-ƞ6v5;e&"2lPzP̕yfix @e$C qׂ:A?=Sfdzc^!@6uUc`kd."oECVWI&zDo43i& UWuc+ p3dGA=gCoQ? L/bPcAv:hJĢ"6KNu)қ, [mI&4'G+?浚Pdpc^Vp3M?nzo=w]iEPVpE7\%]d|k5h$sڹlFp_^ek ZOah)ܦ8GA=~ _5bި&ђp+\8Po"N%\:hRd*4 j4r89GK7+AF/ y&(\l'2v]9%-թ7\r@QBư2K5Ϳ!H=^˥W F->)o5C=yq򟂨cLqjXa=Cm*A)Âg@-Sj*䁒yAToImDێ~3o (IwA)͒ |H0Ww?Yɂnj"68ʁzjpSJ֍Ee=OW7 P.~Yn.ȲgJE6A5FBE_aY:_e E(E44 P?pU4QK *)ATХ3J=0Lo74ovu6_9* ֗lz,0-ailR[d\Jn3G ,T ׊ץZV^:ZF^d°W_Lꒋu|ii:E‡(&M?~UUdiT/ѩL.Ud?Ėzs̃KEԳ*|'^JTS]Ub':iAǻ+'zاWڊ+7:DIAlZ0ߨ Y/!SJx*sSL ]:ƽ*u#Γyqi_Kd{-zô^&W wh5I KNT(gu0e&<сԏTG; =1Ǵ6tQ6'H;N3ljcs$㌆[Lz4;JC:ѴJ=GA0uʆm:] 2 ˏ .ERJh@R e彆Tre˕~=]-P.M˪kn`I᫚'54n^(qKj]ɶg A/V*V n^յ_`n]n*.1D0A/g\Fht @QkUog錶/Mzvo; ڥ1%NV}5n@ {Mn~yd|,zLmu0%غ+owUPexH PvxNn HTd'z}% V8n Na ʸl%ե+gtŨ.ycpʯ 3<60eȲ&dέwt?b+>5C9x"ӮYT(} r!.kCh"ban)}*K]a{r[z%-Ea\"q:b!4GcnkL%vН@EvFO.z&WCJM7\-"$qL8=ٕ(Ե_-u!OSF2R 3E+Zn@[q2du]-r}{K^?UVt:]fmR?G/ERl?Kelˢ:ס^.E/EOhhU(Ujv)W$v)YB9/i{?R 7M/Î|ٝy=*y=[mR%݉v]DhN`Cj+7I[ew~eW-b+QrjؕyO~~FNaWDϛ{6FhT=uô0 ͪ:9:9 `d 60V nng! v<Yz+ Ɠ[Xl9B@;+lH2&|a[ʑw<^!VVTېگWȻջӭ,^ȁwww? Ǚ7yzzzNڻnȻջջջ[Mrj [z"VVV_ww]>ϛ@k?CekVhuWP#׭Z9r Q7馯z虉@^ 2g>~+?|S;%j IG5~2KYd #f,{SBȴ9729xʦ\dvU1n){jX^箔 5[!v q^tX'[CRL'Aǟg_W7ڑtŘS)!D\HUOB2b9l- N!`1Z 2B-1]c7SlE3=i+N=lU<2"VmeZlom]H`,U!):N\A}?E6`3y82C"u57SC?\?GןCvࡍ,r Ij]҅J !f/:r #W/t=`&")t*H?8raExhǜB$ґ1cΰxä7!f:#|#bƶu\/nwi3d?\!#5X.^n #{wD"@dȟr΅YZbXqm OXr!F-~شz#ҲF˘c #~YA# GI ގ z !,`Ұ7S γ+:fALgVYXG?""DŊpVWE.-7 GaCէ.[(yX+t4=YWV!PӲZU =8>KVxP!H܃ZW{;=jÀbԿK_D9, _?dO듗id{$,>MBQcF3// Gڛzʆ=Y#Ǔ WR5jṮ׬82g{A L2ތ/iQv#6oOuY$>D5h%J`G+V,FP& T}Űqp-r:T+1V*O }M'lP~N?aP^ &c*K;8EʠԴ@!O {5=Ya Yfʢ T(^etD- jHxէ P@J݊$z?~_Q7ϽGN#:+3!=ѧUk@;cZK_nB(Spݏɜɲ۹=~j51յ곏['nS3qpA>5?D=@c{Kse?'9apѝlj%*{4lRTdq#n{؈2+j}Avs90dl{a㺂܎v,?nxV$Cn]qB7gw<4تDt KӞ jnoy~(Bہ?vj ~L6.ww\ؿpO@!ɧSnc瑈uoC6fܷC&$Ifpe{"E!1\Rޞ,u +x! ׿Z: YʇΦoE(doY f1m73+侈Xo!cc> i^B[eT UIoU`hr#I]{l1[UW緐O%TW~` uQ^|2(?nAO_രZgpYs>&bz]z'킄K.gCՅ7M@p ~*u&! E(&B~o:*_v|/cH8{} 俴`x߸r\"n/ߗ@K p;K }%]B$&2Y={6FssY?\?l):"2W"ׯ~ nQbt%Yzo',9  ծ SH.oN3 9>V:r" v8[Ɔ͟=bda@2qGc$pazscdK sqhA! Fyh%  Y&.Nd'<³ e$-*㛐N+AKa|[Fײ@39B$~cEW/L"26#:z\Ԉ RuϫLk^ss~5sY\k1ty6ӆ߅֓kXH6ki,q/@1ɁOfg[v{:ݶP '?!fsܟɖeOq<ɴ:t>wS|1v~..W\=8%P#Q"##-5<qGCd^BGtxّ;W-޿I{/?q<)_1{19M^ruXbtUYZXy"=^^ƗZJ_v:x]O!2>! ..."|dόXϯBz."Z'qC~?s!۞1%odX{B|#"cQ?0`Q)U} E' x#? b ןwOX{·NJ̈#9"׏O-E/|TT2涇׹093pZs"g}o_-,7BS^59 }Qz Xc"/=2%{>T ^zXR=9RģZ9T(6قg+N^z-Dǣa@jW /PdsBx?<[TsBF+K&a?~-&45of_3sbz]EPt"0G5?4z>\E3<_9.T|Siݑa`8v_)LۋvL!> }.Ȯ@-bJ'*1Z>"NہN0OuZ<9&pTDž! "I[A`r SH-_1C2"O IA[-"Kb̐|.p|Hc { %x,wbG{/kO0z-<:}=vF3һ 7 )F󾑟aL{K?;b1(aL{KK E_z= / gb `qр$(E^`*A)K4˰\Kj!HZ_< O ~8%{DzX'Kj9M`*7/SQOBoH(Zzê$"Ύs4 g+lz9OXP=Yg̈́9Q/1xߪ7ȿs&}5"{)Κ +kґvr :k"u >ߑY]D~/C:T=/I"(H!Bi*4}ȱKኖ_o_/d~|3$hR7-&/27e*ZszhW 8K >3 %.vGWo y`uwr*5 EVkXRXte|JT!j! A$?ml4U|7Aa s7U|7"Z24i5}":]:)LKj2FiC ]:miK tTD!ZFQ$4qc&Jگ5E4f xLhFWtqI;ΖêU"Ԣ]vi e͓Ȳ_5ܠ57D)yw,HHA1:4;PPTxM0KŇaU=wyJd겏6t+L̂4BݐFä2GM$Tif:/5d&xKD"z8X< f[uO ڽViz$hU DObWVYN_ ݎi[Gt;ϠozmG>~LGPIT⺨RLpZ81^bB$%k(|ȶcCglBd:[d} Ǝ?Rm>. H"Fa -*Ȳ>&3 Ho #Xj|Tn٧3.()B}3la1|0E)Ba SKY5d.@^b~,F+RC (_4 3+"ZFcQ~J5]'#f Q~xdfCүP)Rg,q(ZeN‘+bƓïD#PpqJ>(5x&Hj5\́P|I$::ZOW1Ц\ R,$,B6DUm8)F36YcؑAk|<˲r\ˁ#4Oڈėl2S7d~ ݧ[8 GH|EA1F(Y'MѬSU7d)"Fju g1ZyG~3pK֤+jZ̋bd _aSSfg}\h|5cAsH fOMY\e xגqW_-Rx y r̢JhJÞqӞ=[L.w"JA )j7UC9Y5uYD c!b v8'Y@f~2n KAie4LuQޏeJ*0iQ~J=S_R[ W3D`z<((1Ec v–{m^/CfoW5?a2n儢/;O7Fm?Sx%&6ߓ J:L5wBNx 4Ib92H'M$Ȣ$JPN$z`%h'A N^LIY4*uV 2^| JD |0:NUB-,*%K'*~S7 5GZg-_Pt^WQÙznpVxx"c:xw+W),) {xhM;ʣήWfE藗]HVfWUcMT4G4Bd$8NVxƪ'#/,̊9Ld̰Zh4B)У{:F/Q8 QDh4Y~$y_rmY£ew7v%HW HϖK4R'{lh᭦ΓKFϸ`ϣpsCHIACk;4HVc<[9AάIGY$l+}x#*GORJa[F$+ݟ@l̓ }I,sg \L">=17S c zWt bO8~{AA_+fE o!_wqc ,8|Q{; h}NgIY-75.7C̶L$w<UA5-c#1<︷"-:?$]쪢nʯ,x\O=Dz"9}.xYu4xWv xՇbc Ÿ>%wy WoMsFόNAL EpOs|0WO Eݮ1{_ Uio $4f%OZ%bSCbrE_SĦ^L9&w W\7)¶DJtreX?vR;tnN#%\|F~lߓuAH%>hp(F!6N'n(zM4:}\+ c6AgjnU#g8pe[ z>g dmG7e[:P͒35h*޵eܤz&iJ.S_=pWnv-cTyaz#3%KMdyf}bOvcoWn@֦q3 S[qF wF/+&of3m2.Em2 O!K)MMPFLrNL2aX XhثN`E-D(yt{^`+Ӗ)|"m qZ&eY_qGeQyF=,"F>, ![:?, QsbYz݁$=@8F'l?FiA$?=u`OD&?wY3rjUn M彸1)Mܭc,!cDHt0Gl4#]ӃY϶kd:$|f_!lΟkdR⯐el#UƺEGD" Y֍@En-0b6*{^!WޭޭޭޭޭޭޭZ-u\rꇭ_ZȻջջջջջ߼JBqDޭޭޭ1[ _riq+] V귾 iռ.~[%B&jY}Օ4jgOO\4S4kŏE>L :'}Hki'2Kk~gob;Bcϭv|FunjN<ڑ|q?~hfOꇾ[VV?D zۍnjDi6 Y=~ODVl?FH=>rѮ)՟.k+.onL!mZXGdN|NўeC#bÙ#gENbRU<"… FM7!C3BfGB+=ߺ!m1V#ZHNyy93m?+1~Z<ƨ}_#`M?KviA[h{MR/O Ox!3q#K:?rC '%&!?ў\Ŕ`șу5;pcZ"MV4Ȇ-p/Bn+pz r/!x)6|5~'k/5+,{9l1Q%|]}Z-^ p6^@ fVJI bUL^\ /* 6$\/VTzj?Xug%{ (^j;UEyn^݊ o߻٪m#w"b{71stos|}_׭ohrϝ̻aW{Sd]hr_!| BD3Sl͇7)Uɘ'֓Kz.&vmж$y:EHQsǬpJd}CRυMs!ێ! ˬv(?Ԥ}ݡZ:]$`7pmG~^p{ {1wOCKPO*xR #jZ0B׹:yMk 6 9lChZEjTzy _MnP^3{=}1K%rz輹U*7!:=9> O,M;7F? bϨg7F܈5h7#ta7OX|J#ZDxEa@ߏ(OǼHU<ԏ6/1U²o~*O |@T.Wyg p;K*FS"҈'l@<}AYB2D-HAl KI@H&$Fo S$ku>u% 2 ,><*=:_Keyx>xhC.wu`zQf ]+*+k9:ӕrlcX7ۯ X$ZS&8VR 2]?B{j1ldSh2ϣ@tUx Kk낸ƚ&в'*4Ey)en 9fSOgfHӔcQzEJ`ȰdI\W| d2aVaHȏ&uXfHjCc&MBΉ!ErNddR"d2pasƗ  \?Z{KG~a ^}Mljr"֤Gú\ǑGlMVr \Y"D9Y,__gϗ ?t2D;ȿ|y %_/KT迭CvȀfԗFMv6tyb(B$ږڏQi("qykp')Mݼs qIl[x͈toB_..< 6{WwdS&υ9 ,6uR )cK_:VrSYnV@ZO$IІ b`_{^9!SD|wCma& 1e{ol|w.2iG =-+M".1Xd{HnLUY|)^_JHXb6X ^gi,"vC %dtEL CF.x!{O:^xȍםC.½ElG;_/OX7E,_py#ofZg(B3Pj=xC~([@ym[Ē" _w#?HR^R) *zp{}I`ĥ۾qI(cWpW|R\Zy\RBł(mMzaHvRT< 7Y&PPѬY_p>34S k7u-R0VeѰh2m/+y򫩄Ow+V~i[7|-uPPK(o(x?3%dEXHQ6x|ъ$W /pXD;> ҴdǺ_P:_e\)p'+aG.E2g 0KbϨ`Vv}FB;&PǭNdRKZTUjUe̫V'rJ\jOUOst)qi;3J—KqJВ"y0;c-"I/(k"mƏT00eta\u8S]l3#'9z _5K`- mDc7$2,a'~?ɫ]%gKYgY?I^e>ȇQ+_LBS55Coۨuz:8ɖ\(Es6Ǘo%du6Ło!SS+U |hNz֝FE:{Ͽ9 $0f )j:cS_)yKs1Vdh*aARh6M?eKkf"~K~Ds6cbF=7MS*$UƭH_MMj*4G@c>X-eI 33 NH9A.YF}[QC A"<}M\Ǒ^yp _Z*qLj^hHH$dXyɺ} R孯̪ 1i[f)&>7-@AHSs"XfMEHb xyY{eL!ײ/ H[pXWIHWmynL*5&晔;q̜eU"ͿSG!y@&@f,m1%e>zy_ mKxqTiոռ]NL|4VU ɏ*/yH;69݈ MiITP! p+(U=xw(>K$B;rnC\=hM3Eb#L'NRT?>CYQKYQKrBڪG\ULG titȌ$'sIWm>u5DRWCS4DA UDn.i b1~ȸ@kӐ&ItJv`b ڪWyjՌ@t2XS+!*!]f!C )C 2Z v-̏6U;|"E6Cc֐ tIF!CҮGٮgOSեf֊mrHT$0;TM*.*V wqU!*Qn0i#$agsA"B>GțZW!Y/@^Ӡk'De4^+i4C5o )vm>.~4hi4ܙ"ԶD og=[ ɳK&:?b͔REju0qmZa^ ̯n)ZjhHa NF%;PUW^$>47Į~;3d]]1jsFmW "Rܠ%M<L"^ ]㡏`1is`H#kupx )dMYd`DfФXkS@xV:$8M|cB5im ZWUq uHSٸu_ٸ $*)|v*'(83*$(Wqa7;cArdS ͖ds$)%/"H_57?sHe19-g9Ppm劒J$)i!' 99Q$ ̺}".Jg*D>4d#@;S63״!P#oɖoOEUtH%;"KH%; ;֮P[gC+#*;eQ8xJƱQ1]t6ZMjC)a2˩v4e@duu`gZFD_u#wEG$ʻ#R(e! >vD#CKP#9"&ө%4:GDqO#X~(Q.p6s$ѱsVm,&J:+kNb) m_h8ɒʾ#4K0KVvZ"G9MIGYr%$(yj#Lx" G޲% n+ p^)d@O+?:,(<0'^[r1bKB|x42h.xj4]TҢbi]vÙ.~.?z)å]2#rȧ'3QzEUfNWտj.d~5wJ \L*eY-5 H]N,Gd]-0vD_PݕHD46u+<6dǾ ƗMEF_wbԃGVLprI܅`_Q54It %TpRڅ y2 r;4RF*6|v@Ub{c9Eu<#Pyj"N L= 7dg?d?$ӰPjWZ6 }^WɄ'Z\Z6g6ZiQ^}]ĭH W"\q1/M]i'[k4o2d<4*MKp#5ٿ*Aq̯4܄kSm2]}y$+4|0p@uIg_Q>f7-E72j}e>:e5}1@"NsD?T](g ˴ {YnФᔃۥms Wq2)Z;8B]ýIkǴ м! _>L8FPmEN+Rnu+=J/k=ؘ>#elg4uD5I#;%]D]c w%qt<OiSQգ{ T@y))4+)ݥSSJp?v+\cg?ӄ+k R.4KÈ F (<v\YNPt8ZufNY?I @m}^ud'&Bo?ϐtokC:th&"0IOi#~ n~ nN L0g:6;SB:Cih˓'5xM@Љ]Yި?OYإ\ӿNv:OO:hn~ [ph Cwd8~S*kWW'O 8+Nb\w`u]|en ܼ ^^ yx = ݵQ68~ nJ3;(Sݙ=bY'E~*[\$\ىD3pQ)vpWx8n:(|+"m(|{-PN6u?:{~uww_ N8IS@9LNs=i4wH"ԉfw;/Fg'}S܊bs$;piM31N3;͝ESxGNs#?LNsi4?5 !ƘD@ 5g]<~)%g:$ƆcOG8PD^^l0~&9/0o6VDy&`X$Zje"ĉOx.\ŔCVJZqڞ?y2XP.{%\LKmTR,Oumi1- |3_!z~ɬٌ[:k2ne6={/ Bn;v_O7hiNR7 _^_.V @nZl >/$^ƫԟl!+{,nEnvaZ.i6Wh*eSbķlYH^.it_o7i O㳸4kM|kgZ\#^Fꦃo3OE A8a $s! -6s#@9v ` P9ԉdVLDԃtδLtSm/}L!BK qPAS8ZŽbڔ۹1QJ6C#Bp9C"F#PѬ)x@D_ވdPjwaN-V`s!F?Ata,< +]B(bưY1-xX& XvAy)o h:pطI~b@`W8ӂGdU8邊#qCi/.ˇ ]?w:Cm^$qDZ 3^yJт/]!=7#s.Wl[a p_ɍy4IH5<ФɒmKM9l?y֠sdA 0iqIrqzmK`Mެ,cҤj.],ipz&p}e.8R9Ii^ nND{/=I$Vʎ]++R#}QYKfMY Ҧus^ ZٟKĜ u]Kd,j㒞4H+qIXK;eYC|1G|i̻I;eF{hh?n7kPڗTڊXQw$ٟ jE+rE_~sRߖ%eětqusFʤ]3/-ǥYr]l޼65ɽ/5YɅuYpl,5Ԋ5r}sQzً+x3x+L]Z/5sfMyBY@t9r^ߑ;+A>qW [:O@_~WJuM?wO]sw޻yWiGK;`ݎMٜ>P>#f RRX'dcw3t7=!2(mȾ7Sc_G&$@-oHHߐu +ىyڸ6PPՉ037$OUq$9ٙJ~&N4@08=}H5ɯ`5Î,gZn=DRY2h҆^t42nޣx5{ 51bSx,?&//8#FאHr ܹcȘ?1hذo08 {4{lʣ|$]州'FO6rTU?ӽE mUK֢,'e\eZ⭬8 @DAsx; |t (5XL1W0Vm7L,m/uyǝ4-:YlMTd'80>'oI[:;`xb ,)\RI N7eՙ2]OT%Bw*]>&2fwDö;9›`rtRaWhS [5FvI5C*߻ IlGAYx>n9.\Sוm 6>:4(]дgAþ&d1R٘09M$1[M$БӴ :#zoґpFjZJ7ɷV0⡥Z\%`I BNnj lM3!}1Kfs:DN3ub1ipIzL3BQ~noެRLHο\K4$EOzJ <_Pidw4-ft=4/'[\{-KUpkMǮ-[xj*(NēOmzxM^p%/ik}i ׀Ӑ͡Saŕ<`Vtgg۬W[c֜4d.,SGkٻ!)Ԭxj3Es?P`IYqֈnbI#0-F3}rM{j83Nmjpqn/fii}4PAlP!nn32F4P|h$# 6ǽ(=8 &#C46դ- ZB̎0dvLBw*u F[Ԓ ^jK:O/Yo`7VfD1idxsAc{jCZcJx H#ˁt(zUT $W[)p[hQ"e2=nS/-U| ^upTk4s1y/m%0cTz'DnG)ߋe<QX޳KcXdyUn p֐}ŀx2;QNt۹)"TTnq 0zT1OʷLnȆ! 20}exyi;^ד& j ] 0c4ooͥNbK^C5q |mScH{O7Qjbnw7BaىYUWQH H0zwl!.pgq8id0(uۻF#UVYf-3eo2L$&8l*MT*`Sȑu5 *Ļ }>*T0pVyI𽁶s165+>Fi %ΉS37Q7gp>jƪkD~F9\47 TRϢ9-|Yp.2 Pw!8deWiBd@<֋nxpunţE=51C”74'r"mpt:ϐUgc[3*|q, ;FBɰ{Fre "rׁSn_䰱3#KtI#9^&Fs׽&fŐeT՝,jU;!qá0,qGˬܐ^o!g-%y4d8'oPУ[ ɪ;y`?"!aMՄ;x!MO4)TQ就oeփ'C. )b`aU! $vmUo w S}%x1 ${1ٽ#094#S$ ɻU!rD$ţޑ97V'$>zM*? MU۶9RnkOoD wOODPQ ? I ? =C*>~@C9">cC ?!} ~(vݓxyN0ނ#WzyQ0,?߾`FNA/i0F^݋/× 6!()7,,@pqaD6!Md_W85ǣMءy1M ԋd(o |w V8k rP{`aYNe91Ff+^,/XSz0yq(8`7~g:Kq"ۧ31 KHG`dMɥu\W8gR x&;2 )Ȟ$Т>I8Je>qQVo;?<}&젧s$|D#?˾Bp&1 /4p^<,$n;xr5̫H,40ـø gLqUlIEW8Cpť-둇Mҁ!SU"|ȁ EYwDuU4C~ ɱq% t/qp y/7̸Sr}EAțOR$xڙHvd9숓}=I$]8vI]A(xg)w_۽!< ~㌙O㔩RrkH[ ӎڈ!u1Bm\lRWj]t15pql1wV8w1Xis|jz/{/iiZ^j\qWնKOtDXZ|.΁kX.Ɂ5sE4/XwyM wy~Wbv!j03=17 Em+Ruܮ"~$ZusWWsmlKmZRd~W,x3wK;R×,tYP螽m.*Q= '$X̒8\%-L)=SqT%N6]elukKG|~)pq.- D*ح|@K4H\L|.so%͔-xw. ĉ%Մ(*O%J|X)hgͶkL3|Snfb=~( kj͌t033 #w'Ct <wPӓ%ґyڐm3"ΝZ \dMt!;^f05]Ny*cdlȡAcI Yd @<)x.Yu<8eC)fg 8eC&? -_炃vYvA?0wORZ2 3f{( 44YfxNKdoI_m]Wsx#a.d=ސ8{&Gȍ'0k|jA2IAUؚ=:d'FS@r#rT #E[\ 12a!Rh!^cj䰧dxCy TE dBîJk Q"D03I~GqD9g"$A|!yKL\72R '>I5c"T9Tór##gTDhJd#gTޟա㏱ Lb bʞ=3O/}3R҄f^m2Megu ,MW֓ )O[abͿg4k9JZ߁ʑ3dHz : Ѻ(Ƶ)TYI8OgSI/fU}>z[ٳdDV< vQ!s9r*vv\dwS?GS~^WZ}r1cwNn?.=ܟg)3G=2VwdJo9U?B4o?-&U1xN֝h#YTTUɑGEgM{R}XT`>jbB~ŝ3LGemHS5$Uxd{"hejwHKx)<"o'b۞9(y"ȡD5̀ y ~}PΙQv#+H/`qHtA"&$?̥ed X}dsy1jNbZ^pMNRBdp2Ϡ9aLTx.9h|'ܟlݴdQgʛNX\ Ir P94Ϋ )+RJ@Ph Ҩ 5ELRchndw#gb$'!wTv͐1P~H_! `4b &O~mlR6ďIRlԪmEWوK$<mj5h>oMl`#J3Gp#2 wܞ֬ͷ҄BD¤9^Pp'WYR}:2v肵 `۠7ԙ)ۀ2{`£B䡳b9 I܈ˆdM:.Hm-s.8@"3< 7"(ʨOREJ:7KRyYYNpb%@G8`Srܔkiop(C2eGg-4?z)~g2PeHnf !J5-Mi xCfS)UD*! <ճP6DM'nʟlRAӐc+,SOω9yN×t8 q˼`<ߑNdKg{|9wG-g婖Xp[b{1)y5fhy`r#T8i'ianADO"*Ҩj-Τo2!Q6 ;PمX6 Ր_a =T}sᓍ؎q$9xٳg[؂8"ǁi@1.7/FޏnRVSVz:Vk<L)^N~|ǘ|T"5JrD&X%u ]S0˃&u~2P O}2i/ӤIO}ILi /9a ܄07G 7.Hi%"CQ |MC KŠN-='VWd%g=ISkpMCJvOˎ{:]bbx$&_/4NZY2H iΤ0U\ϔ\ iL8 k٥ ( In ~`.r݆K%XH4A(!{Fe$mFe*Eэ9}$C):B>>4h躑6H!"}3D:5ls+1 ;EٷR`Ab'Lwh6lۜi0#qjt:gѯhiݶ^cx_ݸjs<'6'܃ZIOa?X^PqD L/P\p jMU&:M}Ms^eHNCJ< b^IYCeˑksL[VR`ȴ'`efX@ҦWeZ9IOsҥG]4htETQ ]"Q5Tne߻kՉLC'1&2$12& >N#;ja)y 31mG.{C;P!]R{4fRi#'wfh"dýFYw:II2p!q}v&cpژu^?яdT='lQݾEG'N?vi#)7.Gw/gȑ4rU,GSI^<[i*n&n䋌0M0[G;c1GBqu;C6j+?7<}߽޿x{Ň͟g+ql?>4Ax<:8y2 w?5 #_|řˏ%1QI6vy7O׻{2;80%w5HȾ9&gS /ȾևN7~8x/\~Y|H~xo?~xxW"Po1oaReZZV&A5 er.3+[+UA\O|jۗ1*)?ۿJWf>_~[e吶poo2rŒ#Q\8>?X߽z߀C%_ۿ:}7k-?oGa,OHlJqFUZ7o9S͘rFW{أT}YBĕpVh0^D\ XA f(ëoSpNG<㾜3T-gԦq~W3C> stream xYKo6_! ZQv- l[$XVVR^CJ(Gvvș!|MpJU1I*ߗ.'9`3 ncnM*RӷUbPmG Eo|mG˲C,l0Ü(7{spzW^6d ny>vO˔(Š&,3ty]4O/?3 ouU"DT)S0Tvv ֌s<{TxK@(+huJm`.`*`GF~YDT$q>pF#.PZt1T fđ+W" RgPO0>)K#Qjٴ>R$M^*7ʂprIe@ 3-'!NNpzZ22U"[?d|N9d?BbR.xukU)f/9:;/}V#8 d*l-1=.$nO]Ϸ@MSpXdۨ=*^F7wUSy=ur[$e݃掽d[VX߀-L(;Ddu;w~ns'ENIF$썭1jC "FM&Q̑ۏz쪄˺ |M Hp3 q̶0([73[o}Gʠ1Xc#d&^bUFqYi*GBtI|rҕ;7-ݣi# @FpIW )vP tQms3tn_SYMQ"9qObH̔5V&bl, ã2WMؓ)+F64TkYikVYId~O\q1:VWc5XSp&pi WpGˮ$GkLQpF!Nw$[ƅClȰE-$ MN|?}h|P_^c_>^o({"ѷ~E!,|D t_Q`!/IդgS\4E<-b Q 鿫GoowQ]O>_Kw%Rt!U=bFPy5n__yܤTKB.-r|_Kfǵ P̢qZ ;O&5ؐxp>*H,f8endstream endobj 273 0 obj << /Filter /FlateDecode /Length 1265 >> stream xW[o6~ЇE͉=!:4mكbˉ;[r$9Yw(- ~0)\>G Qbf#};T&DHe"&,IbLT%g ~B;uo~&ud@O*l9M$G1ͣl3+~f:moLعfqkb1$WmF$]F<_te۟ģ)*I頃:L<'D80Mj5!D6*- )vBh<KOXȸYwt Ku0ectW#r/UU(j'^m 8 r󄎄)Ū +[ FY<9We띱A-vv} ؙ/,ci_PpX _@{= 5ja ZyXW,Ԁz{v>jOܸkӆmB}*hm<ʆ%#h5&' Otҩy)ʽu(Q8ֆ Fi -uN&H8U%Y%h"Qq(p= q fA, c+BybQG#ԷE# rI&'P8nscٟ3.ќҬ 7 #jONCQ (Ven{ T1FMv 4S "qIzepKAݝДG*.xQd /sO-=E/u d޶E넟LѽG ?PxcaSOR($;Sn2z` qrPL_:ܢTc{tendstream endobj 274 0 obj << /Filter /FlateDecode /Length 905 >> stream xVߓ6~`&AH<]L$1}e<2Ș?!_rclwCcV}%~׼q[::7 Qnv-ԥT`D.O5GxT;Uz~LHl^N7؅O_!X4.J9i8! %"es,B9k؞007"[J!'0M-%ԏ4 fQ$xB(1p )EG '1O, +$Zx~l1zTz{RZ8S+>a٭z}-*u5A)Rr_HjC2X[U7g|/vm[-"52w7g~׵]#e9JLmbFW_[G ηS Zi?lԗN?}R7< v:W}PW+]jc'xW>?%l-2þJ,%FuUwչf駩Ctͮ Mը }ZuF*~>[jH5J]nGi7:UL|o=R-z8sLU)<ўFMWX !UFYS^$Muu 1YaeF>ny=fylCYL{Ea[SbylZٜJ-Z4؃!=ˆTU.x6TFJɉ‡DQ"mݨ%ԧMFk@^j5RW!1bme(M* 10 4#3(49:_`,G45 gendstream endobj 275 0 obj << /Filter /FlateDecode /Length 2006 >> stream xZ[oѿBy1Ûx)>-Z, ;dYN4,L})Q"ى6`< ɓaD"l Yhɪg#"Zo < mPs&]~|Z"EIԴ qy7LoK,2oXmjJt=Ӭs&1]O3W3h<(I@fWCP M@n_f֊8V $_r>gK"~\/559l*>#.8F4_-(%4.3S!0sd .Z Tţ<ݸ s< fͷOjH,~Jw"~vxbFs uV*mǕ1+s:C3`-`Ǭ5KCP!6E5Jqʔs\̂&Ov ֕m]%-&owŒB`F|jf}إ]vE"K/ bwHZx5bITΐ5_&)#O" 'j3a&K^:21'zͭ;}ǐX\Ng>LLqGó,lN\ Z !d$m/b {#ł`$#8~cƖDc::g&  'tl$M5 {j5Yjj(>7'skVb;.3{r1Nt%Pp$"If [8@}ќ75s{FBi0] y Dic :%b:@u77i]Zj_CupܘͿ=i,Q~1z5\!ລC FyHpAR)XGomQװ$<БIxcp턐Zh9L(źUcD_y~}~tW ~P`C-R2bp6SIoHzIny>݀Y_5s)}n`ŻSLfw4(aL2O'Ǐ1 +8Ma;6.+)L[Gyؕu7?&/L-C2œ9@ͽ/+0!!(ڴrV>MG#/<  > =8Z_R$ ~2~8 48 4+,,Ҁ_QE+ҸcuEa4c2)T~em ˓?gr&so^y/Khe ΦOz}p@,Kz9;>pU>,=.gUAb19"] #ndH+K3{ o QPd> stream xXKoFWHPApNԼ5=P-ӐHwv] Ahw曝fv'_#n-z3XDK#*Jh Q@2%{ΰ?][:{7S"nֺ# fdP$FnO|#__|c V4J'J +±ؿZaw%t K;A `z86.oՓ2HhɈQ`_m-m<}0{J'BP f7Z\Ikcw,i#aP~ 6HiqgcI-6e}WU.쐯تvԃ<[}n.F&^mu_׸S}/*i(S\tQ""d,=OB@ւF2LQbr9H =zlk/[?})Nh9kW˼9wL;i%yx:Qt8R^a2zKKPi(br_?wh帵4t$&X q_zz@[E<&V?=5& -?'<)O_IR28Wȍ]S9 A1sٞ\{4>,KՎ 2r ʵ loM麁~0@]fRRVC(nn hz b5рYj0 L|q|A}΍Tp,)]3ѺX[x^ŽQWs,W҃&wC:wRQs 6cmĄNт̨[gPendstream endobj 277 0 obj << /Filter /FlateDecode /Length 1102 >> stream xXKo8Wa[ Т]m8LIy9 eEք|Ùo>wZ(gs`,Px)Z4L1ɍ}2uzxHNCH>T-iqc#RH.MCOf+fErm$@ R-&e('mشЛv]^Dl- 5)~=@FY"] .@f#sGÓPXaV7)dZmu^<5mwmg4f_vձn]Zh\u?WngW{6]7MٷrP4b%t/`E0td(;x-|zxY6\(K+.Nu}51ud[B$-Wbx\Cp+Wk#z'fT>ÿ49}'ϋtxK+^,\̀7M7B옳W%/׀7ȴila^ "8ع>Tvsl~9ڗAwRUyUumJ#/r~@`2VrUOf@6`( F@b#ԺB y۾vl`9((X7mTaN)l uU3Mz8M 80ࠡܭ9>b Y9)ԷvKZAp|csX类3L!DC K-8 Vi8heh|8Y9:su7-P<PmUz6njw<ɿKLCh~쁀%!N(R)4瓵x0gH'(=CY+BSI.Cf|q7:>endstream endobj 278 0 obj << /Filter /FlateDecode /Length 1113 >> stream xWKoFWȡTP1~F".=%9Pm3Hl;KQ}of?bٿ!BJxy0(U<ƌTJRsYEуNҨa4pg.>_ßL!"cS$Y,IqvwwH0+kWulj7buXrm+Oŷ^Pg%wNWf߸[U^NgA[ԏʭeL 򼉴$wG@  Y+?][Vi~p'ir r*ՠ;(K `#݉ qwuXbzd̼VġB? hSlnbe]I!, z-٘8$WsIWs_ʼH /00O UEh@˺rzjۃBtL%R$^T2YШ pTr_gJBPj;vezm$S~G״'H q c?QGWn&鹮/$? ^e%^qO*, jybD=t8S:dr*Um/[RVmр}_۶^P7>ƼmbujWw$!Iž~Ij ޴ڀ7**N5ִs~)n8gxicH:ʆ9o>i[EdC ;pQ.OCd5Ƽۆ}sʗ#ySEfh؟NJ0xv ՛^E;8Cm3hWF!DX dn_K @"<,  :%FA Mp 9&3"JQ9ȤNE!iQbBwOGW3EZO%!rSEףmtJh 3̳C/\^gLjFU7$N_ #Jl"?T `Dl$3XR`Qx[bg6n,ܶ`{E#_+gh A L'/6ߠ5S9Wendstream endobj 279 0 obj << /Filter /FlateDecode /Length 2145 >> stream xZM _9*W dC܂dlϮ4%y#a5DX e`/ƞ,^7?ܵ?׻}z7- %?|? +Bq)#?~+Sgo<el96߷_D$#y4T诘\7ᰂ4O8}~?rj+[LԱ5 T K|89/qf}qjep8}]|~ jI/Q*-Ο?inaw_/7M&'HJ 2D@LD,1-zRSGRـ:߷ÅquL`€sYAL /i,O\_u)dC= 6xa[]J]Zl`,j+BqUnH4}M΢Ayu6IbA1P DO$AJA% +˷z+n4 n z>zlO>zg1^0-kQ3z3v ;LM*S$oK11EP]U3d˱x`=bP}L4㲴bZ #5@YJLT.&X>?_mV U:qN'x 6Ʋ:ɤbnNd$kWUO݂l媸!%$I]媦LcU%tX*-:Q'V.Uq;-5 &+{bWŔ%C5&|BV~&R_T aHLv쮼 TJb)JN4 B`Cp5 f77 MOzNio!L<{QIu1ϫ4 >"̅";sg2t>^Ȉ+ioU`ʄGȓe4CiXF:Qw<$ T 3Pͦ/ }dv 0Q)]7T}Iw, X 4mCmW>W֧_iQ]<H˭|/9bAyuS..l E>HL2ţDa|_Or^S M7qpjV0A^s%p*#DʈDHH (nԼ{XSSd0-74z :pRr%ϊ%\]gy> }FKzFOi}]H4'( a%Cmzlprfm{F!XHŧ<8KA{3MV}Тm w8ecrCHu\N?KbZ4!{=;|L9NA3k[6| .io|򮠱o&+p3^'Eјy>jNߠ$/15f~kbRn'9ː&ۅ/f[r=}T;8(YHLlR$'Pt 5z7׸uK7aV[s`Z3rA,^"j&Đ"DFEmwŷCĒOŖ]k#* c[dw&)jN?go6endstream endobj 280 0 obj << /Type /XRef /Length 180 /Filter /FlateDecode /DecodeParms << /Columns 5 /Predictor 12 >> /W [ 1 3 1 ] /Info 3 0 R /Root 2 0 R /Size 281 /ID [] >> stream xcb&F~0 $8J[? u(Dp'3l#ѸuleX&{@$;" A$/ɑ &.mr Rx#8H&5 (&πeyA$}&rmX/4Vkr lXX|X|1m q endstream endobj startxref 172442 %%EOF psych/inst/CITATION0000644000176200001440000000054214545315236013523 0ustar liggesusers bibentry( bibtype="Manual", title = "psych: Procedures for Psychological, Psychometric, and Personality Research", author = person("William Revelle"), organization = "Northwestern University", address = "Evanston, Illinois", year = 2024, note = "R package version 2.4.1", url = "https://CRAN.R-project.org/package=psych" ) psych/inst/NEWS.Rd0000644000176200001440000057361114551374146013447 0ustar liggesusers\name{NEWS} \title{News for Package 'psych'} \section{Changes in psych version 2.4.1 (2024-01-15)}{ \subsection{Introduction}{ \itemize{ \item The psych package includes functions and data sets to do classic and modern psychometrics and to analyze personality and experimental psychological data sets. The psych package has been developed as a supplement to courses in research methods in psychology, personality research, and graduate level psychometric theory courses. The functions are a supplement to the text (in progress): An introduction to psychometric theory with applications in R. The functions are also written to further research in the Personality-Motivation-Cognition Laboratory at Northwestern University. \item Additional functions are added sporadically. \item This news file reports changes that have been made as the package has been developed. \item To report bugs, send email to \url{mailto:revelle@northwestern.edu} using bug.report. \item Version 2.4.2 is the development release of the psych package. It is available as a source file for Macs or PCs in the repository at \url{https://personality-project.org/r/}. The released version on CRAN is 2.4.1 (Added January 15, 2024). The second digit reflects the year (i.e., 2024), the third set the month (i.e., 2.3.12 was released in December of 2023 the last two digits of development versions reflect either an minor change or the day of any modifications, e.g. 1.8.3.3 was the third attempt to get 1.8.3 released. 1.7.8 was released in August, 2017. \item To install this development version, use the command: install.packages("psych", repos="https://personality-project.org/r", type="source"). Remember to restart R and library(psych) to make the new version active. It is also useful to check the date of the development package, as I will use the same version number but change the dates (e.g., packageDate("psych")). The date of the CRAN version reflects when it was submitted. } } } \section{Still to do version 2.4.1 (e.g., a wish list)}{ \itemize{ \item Suggestions are welcome, but the current list includes the following (and has included for a long time, so lets be patient): \item Add confirmatory clustering to ICLUST \item Get cluster scores in ICLUST -- analogous to factor scores (requested by Ben Shalet) \item Add the ability to create multiple groups in sim.irt functions \item Add omega factor extension figure option (requested by Sylia Wilson) \item Add option to do subject density and item density plot to IRT plot. \item add the ability to scale radar plots of raw data from min to max, and add a scale to radar and spider plots (e.g. circular histograms) \item add the ability to add labels to lavaan.diagram -- in some sense, this is already there in lavaan. \item To help those who want to simulate a bifactor model, make it explicit. \item Check the bias in bootstrap resampling using cor.ci \item add symmetric=TRUE to eigen calls, think about only.values=TRUE for some cases \item Add an analysis of DIF to the irt functions (requested by David Condon) \item Add some power functions. \item Add CIs for means and skews as an option (for describe as well as describeBy) Requested by Aaron Wichman -- but I already report se so that ci for mean is trivial for user to find. \item Add a function for the Meng, Rosenthal, Rubin tests for multiple comparisons of correlations. \item Add complexity to omega solution \item Add statistic to fa to report max fa. (partly done with fm = minrank) \item Parallize iterations for confidence intervals in omega \item Probably should drop much of the stats when doing iterations with fa or omega \item Find unbiased cis when doing bootstrap \item Allow the choice between regression vs. correlation in esem between X and Y sets. \item Improve documentation for esem and cosinor \item Add interbattery diagram to the structure diagram function. \item Add Krippendorf's alpha? \item Add the ability to weight factor residuals to allow WLMS \item Add the Hull method for number of factors \item Add an option for the correlation type in bestScales (requested by Lorien Elleman). \item Change help file examples so they are cleaner for people who example(fn) \item Make the main functions more "tidy" in terms of their output \item Add testthat functionality - finally started 10/15/23 Making slow progress \item Add parse capabilities to error.bars, error.dots, spider, etc. so that grouping variables can be specified in formula mode. Partially done (not yet for spider) \item Possible bug in cohen.kappa confidence intervals (reported by Marco Fornili) \item Need to add documentation to mediate/moderate/setCor for what dfs are used and why. \item Add an n.iter option to pca (requested by Michael Wood, but I am not sure why) \item There is a bug in bestItems for the case of no iterations and dictionary = something \item Fix bug in fa.lookup for omega g values that are negative (but when does this happen?) \item Fix bug in bestScales that does not report items if not iterating \item Adjust stats in fa.extend to report the raw and then the extended values \item Improve documentation for extension diagram to clarify that it works with fa.results \item Add just check variables specified for being numeric (instead of entire data.frame) (suggested by Fransisco Wilheim) \item Pretty up the output from mediate (perhaps make a mediate2latex function) \item Improve the print function for bassAckwards \item Speed up fa.parallel for large number of variables (requested by Matt Zefferman) \item Add df2word to parallel df2latex for those poor people who need to use proprietary word processors to submit journal articles. } \subsection{Still to do}{ \itemize{ \item Change examples to dontrun rather than commented out ## mostly done \item Add the ability to get factor scores for higher order factors (requested by Brandon Vaughn). Partly done in omega, but there are problems associated with the entire problem of factor scores. \item Fix a bug in alpha when finding average R (need to use cor rather than cov2cor when treating missing data) \item Add weight option to statsBy (but the question is how to handle missing data) \item Adjust the se for correlations (in bestScales) to take into account the pairwise count, rather than the n.obs \item Improve documentation for thurstone. Would help if we had an example of scaling. \item scoreIrt.2pl needs to be able to adjust for group differences correctly \item Possible problem in pooled correlations for statsBy \item Think about improvements in itemLookup to show means if desired \item Fix bug in polychoric where it doesn't handle empty cells very well. (reported by Björn Büdenbender). Or at least improve the documentation about why this is a problem. \item Fix bug in mediate for multiple dependent variables (reported by Martin Zeschke) \item scoreItem should find the mean item correlation with na.rm=TRUE (Liz D) \item create a scoreFastBy \item improve the help menu for sim.multi and return the latent values \item think about linking person scores to item difficulty scores in IRT \item Left justify content in lookupItems (not sure how to do this) \item Consider using the Jennrich approach to standard errors of rotated loadings \item Still need to fix mediate to handle more than 1 DVs \item Possible problem with the use of try in several functions. \item Need to fix omega so that alpha is correctly calculated in case of no general factor \item It would be nice to add an option to display confidence intervals of correlations in error.dots \item allow omega style hierarchical structure for x (and y) in esem } } \subsection{Additions}{ \itemize{ \item Added the score option to scoreOverlap so that if given raw data, it will find the scale scores as well as the corrected for overlap statistics. Note that the scale scores are not corrected for overlap. \item Minor tweaks to scoreIrt.2pl to check for bad data (missing cells ). This was leading to the function hanging forever in the case of pairwise counts of 0. \item Added fix.dlpyr (as a local function) to statsBy and describeBy to not choke on Tibbles. Had added this to alpha several years ago. \item Added itemSort to combine item means and item content and then sort them. }} \subsection{Bugs fixed}{ \itemize{ \item changed the number of elements in the unit.Circle in spider/radar to get more precision \item Added a conversion to numeric in mediate to get around a problem with variables that are factors. (Problem reported by Lynn Bueeler ) \item Finally fixed bug in alpha where it finds R from cov2cor instead of directly (reported by Karl Ove Hufthammer ) \item Fixed item.lookup so that it will report item means as well as factor loadings. }} } \section{Changes in psych version 2.3.12 (2023-12-12}{ \subsection{Additions}{ \itemize{ \item Started adding test.that calls to various functions (requested by Coen Bernards) Slow but useful progress. \item Seriously improved lavaan.diagram (and diagram) to be able to draw SEM diagrams correctly. \item Modify lavaan.diagram so that it handles cfa output better (done) \item The sim function was modified to allow for items as an option. This matches the parameters in sim.structure. \item Changed the splitHalf function to do all comparisons, not just 1/2 (This was leading to a problem with small and odd nvar.) Discovered by Eunseong Cho. \item Added the ability to specify type of correlation to cor2 and lowerCor \item Renamed corr.test to corTest \item Modified fa.extend so that it reports factor scores in a more reasonable manner. \item Added part correlation (aka semi.partial) as an option to partial.r. \item Added a discrete option to alpha to not complain about lots of categories. \item Finally added the X and Y loadings for the canonical correlations in lmCor. \item Added cancorDiagram to draw these canonical correlations. \item Added canonical coeffiecients to lmCor (done 12/12/23) \item Find canonical loadings in lmCor - done 12/12/23 } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed reliability to find omega_h for 3 variables by defaulting to 1 factor \item Fixed sim.structure so that it would treat low and high option correctly again. (Reported by Kubra Atalay Kabasakal). This option seems to have been dropped by accident in 2021. \item The call in diagram to call lavaan.diagram now works (reported by Zhang Yaohua) \item Fixed predict.psych for the lm case to allow correct prediction of unstandardized data } } } \section{Changes in psych version 2.3.9 (2023-09-22)}{ \subsection{Additions}{ \itemize{ \item Added the n.obs parameter to corPlot to allow showing stars for correlation matrices \item Added the ability to label higher order factors \item added the coarse=TRUE option to factor.stats to solve obscure problem in simulating sapa data. \item Fixed error.bars.by to allow drawing of v.labels for both by.var=TRUE and FALSE \item Revised and improved unidim and the associated documentation. Unidim provides three estimates of the unidimensionality of a scale. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in fa.diagram so that it will properly label the extension variables. This was actually a problem in dia.arrow for the draw=TRUE option \item Fixed a bug (reported by Nadine Kaul and Maria Bobrowski) in corPlot where my ability to have slanted x lables messed up the xlas=2 paramter. \item Also fixed a bug in corTest that was giving stars above the diagonal for the case of just lower diagonal output (also reported by Nadine Kaul and Maria Bobrowski ) \item Fixed bug in fa.multi.diagram to report variable labels correctly. (reported by Rémi Thériault) \item Fixed a very obscure bug in describe where it would not return variable labels for vectors if asking for quantiles. \item Fixed violin/violinBy to draw allow rain drops for ungrouped data } } } \section{Changes in psych version 2.3.6 (2023-06-21)}{ \subsection{Additions}{ \itemize{ \item Added a threshold parameter to sim.hierarchical and sim.item to allow for conversion to binary data. \item Added a n.small parameter to sim.minor to limit the number of small factors. \item Added mean imputation to omega (for scoring scales) \item Replaced solve(r,f ) with Pinv(r) %*% f in fa.stats to get around a problem of useless solutions in omega and fa \item Added type option to mlPLot to allow for more control \item Changed setCor.diagram to lmDiagram and added the ability to plot lm output. \item Minor tweak to extension.diagram to allow the causal arrows to come from rather than to the extension variables. \item Added rain as an option to violin to show rain cloud plots \item Add the ability to slant labels in plots \item Added a sort option to corPlot (sorts by cluster order) \item Added RMSEA function to allow easy calculation of RMSEA given chisq, dof, n.obs (requested by requested by Gabe Orona) \item Added RMSEA, but not yet TFI, etc. to esem (equested by Gabe Orona several years ago. \item Cleaned up the various sim functions so that they are better documented. \item Allow for different rotation options in X and Y sets in esem \item Added the ability to return the true scores for subjects when simulating structures. This will help fitting reliability models This is now done for sim and sim.minor, sim.hierarchical, others are in process. \item Changed a number of local functions in the form print.x to print_x format to comply with overly aggressive R development checks. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a very rare case in describeBy for the case of two categorical variables measuring exactly the same thing. Added char2numeric before the by(describe,group) call. \item Fixed a bug in bestScales for the case of no items for replications (still in progress). \item Fixed lmCor so that the degrees of freedom for the case of use="complete" is correct. \item Dropped a redundant check in factor.scores for non-positive definite matrices. Pointed out by Chandler McClellan. \item scoreIrt does not work unless keys are specified for nf > 1 (reported by Andrew Olney) -- not sure of the problem. What ever it is, I think I fixed it. \item Fixed a typo in the mlr rd file for the RkR equation. (reported by Sebastian Castro-Alvarez) } } } \section{Changes in psych version 2.3.3 (2023-03-18)}{ \subsection{Additions}{ \itemize{ \item Modified alpha so that it returns a keys list rather than a vector \item Added the smooth option to fa, fa.stats and fa.extension. This by default had been true. Now allows users to specify that they don't want to smooth improper correlation matrices. Affects just the fa.stats output. \item Improved the documentation for irt.fa, and fa.extend \item Changed the verbiage in omega to reflect that we are finding sums of squared loadings rather than eigenvalues (Thanks to Niels Waller for this point.) \item Added the percentage of items with their highest correlation on the keyed scale (as an index of scale quality) Requested by Asghar Minaei. \item Added the MIMS and MIMT matrices to scoreOverlap and to scoreItems These matrices reflect the average item correlation within and between scales (MIMS) and the average item x scale correlation within and between scales (MIMT). Suggested by Asghar Minaei. \item Added options to the correlations in cortest and also improved the documentation. \item Added alpha2r to convert alpha to average r (a trivial, one liner, but convenient sometimes). \item Added crossValidationBoot to do multiple derivations / cross validations \item Modified factorScore so that it will give scores even in the case of missing data. Scores are found from the mean of the non-missing items on a factor. To just get scores for non-missing subjects, specify impute="none". \item Added faReg /faRegression to make use of fa.extend to find the regressions with dependent variables in a more understandable output. Modified extend.diagram to draw these as well. Although developed to use fa.extend, faReg will also do regressions based upon factor scores from the raw data. \item Added SAPAfy to "sapafy" data matrices. (a trivial but useful helper function). \item Improved examples for iclust. \item Following a suggestion from Ethan Fosse, I have reversed the direction of the arrow in ICLUST to go from the items to the clusters. This is now the default but the old style can still be drawn using iclust.diagram with the bottomup parameter set to FALSE. \item Minor tweaks to corPlot allowing the fixing of trailing zeros and the number of digits to display. \item Improved corPlotUpperLowerCi to find correlations (and Cis) if given raw data. This improves usability, because the naive user would expect that behavior. \item Added the show option to lowerCor so that it will just find but not display the correlations. Defaults to TRUE. \item Added corCi and corr.test output to the objects that error.dots can display \item Added eigenCi to find and plot the confidence intervals of eigen values. \item Added more timings and changed the default size for bigCor \item Added lmCor as an alias to setCor. (lmCor is much more intuitive as it is lm for correlation matrices). \item Fixed statsBy to handle the case of NULL groups. (particularly for the cors=TRUE option). I think I have done this. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed scoreItems so that the delete=TRUE option actually works. (Based upon a report by Christopher Wiese ). Also improved the documentation about why this is needed. \item Fixed a typo in fa with the fm=alpha method. Reported by Philipp Doebler. \item Finally fixed a bug in bestItems so that it works with raw data. \item Fixed a very minor bug in polychoric that was throwing a warning and now is breaking. Results do not change. (Reported by Goran Kardum) \item Fixed cor.smooth so that if it is given raw data, it will first find the correlations. \item Fixed minor typo in fa.Rd (thanks to Ian Buller for pointing this out). \item Fixed crossValidation to work with subsets of variables. \item Fixed autoR to handle case of single variable when grouping. Reported by Alex Smith and thanks to various suggestions on stack overflow. \item Fixed scoreItems so that it will give names to unlabeled data and keys (was breaking GAabbreviate) \item Fixed crossValidationBoot for case of multiple DVs \item Fixed reliability for the case of single item scales \item Minor improvements to cohen.d to allow easier use of dictionary and sorting \item Fixed cor.smooth to handle covariances matrices (reported by Douglas Otto as a problem in fa) \item Finally fixed bug in bestScales so it will handle correlation matrices \item Fixed bestScales so it will produce a keys list ordered by correlation for correlation input. \item Fixed a bug in cohen.kappa for weighted Kappa. Reported by Ju, Yu-Jeng. \item Fix a bug in bigCor that was not working for polychoric correlations \item changed the use of the abline(h=1) in fa.parallel to draw it for pc not for fa. Pointed out by George Harrison. } } } \section{Changes in psych version 2.2.12 (2022-12-14)}{ \subsection{Additions}{ \itemize{ \item Modified alpha so that it returns a keys list rather than a vector \item Added the smooth option to fa, fa.stats and fa.extension. This by default had been true. Now allows users to specify that they don't want to smooth improper correlation matrices. Affects just the fa.stats output. \item Improved the documentation for irt.fa, and fa.extend \item Change the verbiage in omega to reflect that we are finding sums of squared loadings rather than eigenvalues (Thanks to Niels Waller for this point.) \item Added the percentage of items with their highest correlation on the keyed scale (as an index of scale quality) Requested by Asghar Minaei. \item Added the MIMS and MIMT matrices to scoreOverlap and to scoreItems These matrices reflect the average item correlation within and between scales (MIMS) and the average item x scale correlation within and between scales (MIMT). Suggested by Asghar Minaei. \item Added options to the correlations in cortest and also improved the documentation. \item Added crossValidationBoot to do multiple derivations / cross validations \item Modified factorScore so that it will give scores even in the case of missing data. Scores are found from the mean of the non-missing items on a factor. To just get scores for non-missing subjects, specify impute="none" \item Added faReg /faRegression to make use of fa.extend to find the regressions with dependent variables in a more understandable output. Modified extend.diagram to draw these as well. Although developed to use fa.extend, faReg will also do regressions based upon factor scores from the raw data. \item Added SAPAfy to "sapafy" data matrices. (a trivial but useful helper function) \item Added minor factors as an option to sim.congeneric. \item Improveds the documentation for sim.minor to reflect the MacCallum paper. Improved the code so that it screens for impossible models. \item Added/improved documentation for iclust. \item Added the order object to iclust to allow for sorting matrices by cluster structure. \item Added a sqrt option to VSS.scree and fa.parallel output (thanks to Marco Del Giudice for the suggestion) \item Added a coefficients object for setCor and mediate } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed scoreItems so that the delete=TRUE option actually works. (Based upon a report by Christopher Wiese ) Also improved the documentation about why this is needed. \item Fixed a typo in fa with the fm=alpha method. Reported by Philipp Doebler. \item Finally fixed a bug in bestItems so that it works with raw data. \item Fixed a very minor bug in polychoric that was throwing a warning and now is breaking. Results do not change. (Reported by Goran Kardum) \item Fixed cor.smooth so that if it is given raw data, it will first find the correlations. \item Fixed minor typo in fa.Rd (thansks to Ian Buller for pointing this out). \item Fixed crossValidation to work with subsets of variables. \item Fixed autoR to handle case of single variable when grouping. Reported by Alex Smith and thanks to various suggestions on stack overflow. \item Fixed scoreItems so that it will give names to unlabeled data and keys (was breaking GAabbreviate) \item Added the valid object to the pca function (to match the help file.) Reported by Tomosumi Haitani) } } } \section{Changes in psych version 2.2.5 (2022-05-08)}{ \subsection{Additions}{ \itemize{ \item Fixed violin so colnames are not necessary \item reliability function returns alpha from split half rather than from omega \item Added the validityItem and CD.validity functions \item Minor fixes to sim.minor to supply names for theta, minor clarification to sim.minor documentation. \item rebuilt using R CMD build psych --compact-vignettes=both to compact vignettes } } \subsection{Bugs Fixed}{ \itemize{ \item minor correction to pairs.panels to work in 4.2.0 without warning \item Fixed splitHalf so it will work even in case of n.obs < n.var \item Fixed error.dots to properly handle dictionary for some cases (reported by Matt Bales) } } } \section{Changes in psych version 2.2.3 (2022-03-15)}{ \subsection{Additions}{ \itemize{ \item Added ability to do N random restarts in rotations in fa and added the faRotations function to do this more easily \item Added a hyperplane count statistic to the output of fa (following Niels Waller), especially if we do multiple rotations. \item Added congruence (aka cohen.profile) to find profile similarities and general congrucences. \item Added scoreBy to find scores for multiple subjects from statsBy (requested by Lizz Dworak) \item Added variance accounted for each variable in setCor (requested by Pierre-Emmanueal Encinar) \item Added a citation to Grieder (2021) for a comparison of fa to SPSS \item Added a comment in fa help that oblimin as implemented is a quartimin solution with delta =0 \item Added formula input to partial.r \item Added m2d to find Cohen.d from two means and 2 sigmas (and 2 ns if desired) \item Added bigCor and pairwiseCountBig to speed up correlations for large data sets. This takes advantage of the multiple cores available on Macs. bigCor is also useful for very large data sets even without multiple cores. \item Modified setCor to just zero center the independent variables when taking product terms. (Suggested by Keith Widaman) \item Added the confidence intervals for hedges g in cohen.d \item Changing the name of categorical variables to *variable leads to problems. Added the flag option to not change the names in char2numeric } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed sim.multi to generate more than two factors (reported by Lizz Dworak) \item Fixed reliability so that it doesn't give hopeless split halfs -- was incorrectly keying items. \item Fixed p value in mardia (don't multiply by 2!). Reported by Malen Roberts \item Fixed the estimate of VIF for raw and unstandardized data \item Fixed reliability so that it will work from correlations as well as raw data \item Fixed bestScales to work from correlation matrices \item Fixed a bug in describe for single variables from data frames (reported by Mike Linacre) \item Fixed bug in cohen d reported by David Kellen (using wrong group size calculation) \item Fixed bug in error.bars (and error.bar.by) to match the colors of the lines with the colors of the errors (reported by Lizz Dworak) \item Fixed bug in df of ci for alpha.ci (reported by Mijke Rhemtulla.) \item Fixed (again) the correct p values for ICC (reported by Mijke Rhemtulla.) } } } \section{Changes in psych version 2.1.9 (2021-09-21)}{ \subsection{Additions}{ \itemize{ \item Modified the call to splitHalf from reliability to allow for sampling of all possible splits in the case of n.items > 16. The version released to CRAN (2.1.6) would unfortunately not complain if the n.items was too large and would cheerfully run for ever. \item Also modified the reliablility function so it would not choke in the case of scales of size 1. \item Changed splitHalf to only sample splits if the number of possible splits exceeds 10,000. (That means, if there are more than 16 items in a scale.) This can be adjusted in splitHalf but not in the reliability function. \item Modified error.dots when using dictionaries to pull the column with a name of c("Item","item","Content","content"). It had been limited to "Content". This way you can use a full dictionary and not worry about the extra columns. \item Improved bestScales so it will work with a dictionary more easily \item Two further tweaks to reliability: it can be used on one scale without specifying keys. (Suggested by Tom Olino) and reports the unidimensionality statistic (u) \item Think about adding the test for number of factors function from xx \item Added factor.scores to the fa.extension so that we can do biplots \item Various tweaks to radar to allow for more complicated graphics. \item Added the ability to plot the variance accounted in bassAckward.diagram \item Improved the help page of ICC to explain why the Shrout and Fleiss estimates are the same as the McGraw and Wong estimates. \item Minor modifications to reliability and split half to return scoring keys for the best and worst splits as lists, rather than vectors. \item Fix fa.diagram so that factors are enclosed in ellipses (as they had been). Reported by Mark Greenwood. \item Added the normal=FALSE option to corr.test to allow for more precise estimation of probability values for method="spearman" or method="kendall". (Suggested by James Conigrave) } } \subsection{Bugs Fixed}{ \itemize{ \item Added the ability to sort means correctly in item.lookup. \item Removed the char2numeric in cohen.d (added in version 2.0.12) in response to an error report from Frank Papenmeier. It seems to have been an unneccessary fix to cohen.d. \item Change the d2t call in cohen.d to reflect differences in sample sizes (reported by Emil Kiergagard). Also am now passing the hedges.g value when finding t \item Fixed a slight disagreement between cohen.d and scatterHist for reporting the cohen.d. cohen.d was correct. \item Fixed bassAckward.diagram to draw boxes for components, ellipses for factors. \item divide n/2 in case of equal cell sizes in d.ci #reported by Fred Oswald } } } \section{Changes in psych version 2.1.6 (2021-06-20)}{ \subsection{Additions}{ \itemize{ \item Added addStars to allow significance printout to corr.test (requested by Uthpala Pinto). Now, automatically gives a "stars" object. This has probability "stars" for raw and adusted p values. \item Minor tweak to lowerMat to allow it to print character fields (for stars objects) \item Adjusted the way p values are displayed for mardia (requested by Jhonys de Araugjo) to show out to 10^-300. \item Modified the mediate function to report tests of ab terms against each other (requested by Nancy Collins) \item Added within group density circles to scatterhist. A nice way of showing how M.distances work. Also added Mahalanobis distance calculation to scatterHist. \item Added matrix as class to partial.r \item Added various overlap measures for cohen.d (d2OVL, etc.) \item Added the reliability function to find alpha, both omegas, and split half for a set of scales at once. \item Added plot.reliability to allow for nice graphical displays of these results } } \subsection{Bugs Fixed}{ \itemize{ \item The p and p.adj objects in corr.test were reversed for non-symmetric matrices (reported by wangshun) \item Fixed isCorrelation to handle output from partial.r (reported by Isaac Petersen) \item added unclass(r) to fa for the same problem of partial.rs \item fixed catseyes to not choke if no variance. \item Fixed a problem introduced into fa.diagram where it was not drawing circles for factors (reported by Mark Greenwood. ) \item Fixed a subtle error in bassAckward.diagram where the numbers where not quite alligning with the arrows. \item Finally stopped getting zero length arrow warnings in error.bars \item Fixed the mediate function to correctly report df for the b term (following reports of the problem from Nancy Collins and Amanda Montoya) \item Fixed the printing of non-symmetric correlation p-values for corr.test (reported by Bowen Li) \item Fixed predict.psych for the case of missing=TRUE to find colSums of weights time scores instead of colMeans. (No change in correlations, just the scale sd) } } } \section{Changes in psych version 2.1.3 (2021-03-24)}{ \subsection{Additions}{ \itemize{ \item Added char2numeric feature in lowerCor and cor2 so they will no longer choke when given character information (e.g. sex, education level in sapa data) \item Added flag=TRUE to char2numeric so that it changes the colnames of those variables that have been converted to numeric. Set flag = FALSE to not change colnames \item Minor improvements to error.bars.by \item Followed by a major rewrite of error.bars.by to make the code cleaner. \item Added the data option to cohen.d so that we can just do a subset of the variables \item Added the ability to do multiple DVs for anova of setCor objects. \item Changed the sim (and related functions) to return the underlying theta that generates the data. This allows for tests of adequacy of recovery of the model. \item Added pairwiseSample to allow for faster counting of pairwise frequencies. \item Added pairwiseZero to display those pairs of items with 0 (entries) (Requested by J.D. Haltigan) \item Completely rewrote sim.hierarchical to produce the orthogonal factor scores that produced the data. (Following correspondence with Niels Waller addressing the estimation problem.) \item Added the adjusted probability as an output object in corr.test. (Requested by Alex Weiss). \item Added legend to matPlot and other bells and whistles. \item Modified testRetest so that it automatically sort on time and user id and drop subjects with non-matching tests. (Inspired by some comments from Jacob Lee). \item Reordered spi.keys to match the bassAckward order. Added Big and Little 27 identifiers to the spi.dictionary (in psychTools) \item Added to the fa and smc documentation to reflect using pseudoinverse. \item Fixed statsBy so the grouping variable can be categorical and don't use it when finding the within group correlations. \item Improved testRetest to allow for a different number of subjects in the trial 1 and trial 2 data. Also allows for randomized data. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in describeBy to handle more than 2 grouping variables (reported by Nicholas Stafniak) \item Changed the term eigenvalue to sum of squared loadings in omega (in response to a comment by Niels Waller) \item Fixed bug in testRetest (reported by Jacob Lee) about fixed id column } } } \section{Changes in psych version 2.0.12 (2020-12-12)}{ \subsection{Additions}{ \itemize{ \item Released 2.0.9 to CRAN 10/05/20 -- thus 2.0.12 is now the development version \item Added the option to print correlations in lookupItemFromKeys (requested by Seo Young Myaeng) \item Modified isCorrelation to allow for NAs in correlation matrices (requested by Lorien Elleman) \item Improved error.bars and error.bars.by in response to requestes by Lizz Dworak \item Improve violin and violinBy to provide more features including vertical versus horizontal plots, stripcharts of the data points, and error bars. \item Added fa.pooled in response to a query from Asghar Minaei about how to pool various imputation runs. \item make cohen.d handle categorical (character) data (requested by Evan Grandoit) \item Finally got the s.e. to intercept in set cor (requested by Eric Youngstrom) to work. See attempts from version 1.9.12 onwards. \item Added a anova.test for two setCor objects (did this a while ago, not sure when) } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in setCor where it was not correctly handling the case of std=FALSE, zero = TRUE (reported by Robert Stupinsky) \item Fixed a bug in mediate.diagram so that it properly ignores the intercept (also reported by Robert Stupinsky ) \item fixed a hidden function in thurstone so that it actually works given ranked data \item Fixed a bug in scoreFast for the case of one scale and count.responses=TRUE (reported by Alyssa Counsell) \item Finally fixed a bug in error.bars.by for the case of multiple variables with some missing data \item ptci in corCi should be 1-ptci \item Corrected the spelling of Branscombe in the documentation of Garcia (with thanks to Barbara Toizer) \item Added drop=FALSE to violinby so it can handle single x variables. \item Replace all calls to mixed.cor with mixedCor (responding to a question from Lisandro D Colantonio about mixed.cor being deprecated in a call from statsBy) \item Finally (?) fixed bassAckward.diagram to draw the lower level factors correctly both vertically and horizontally \item Fixed a bug in faBy so it will work with one factor (reported by Kondwani Kajera) \item Fixed print.psych.vss to properly handle NA in BIC and EBIC (as reported by Paulo Sergio Panse Silveira ) \item Finally ? got the error terms in setCor for the intercept and the residuals to match lm! \item Seems to be something wrong in mediate df 1 IV (fixed?) ) } } } \section{Changes in psych version 2.0.9 (2020-9-09)}{ \subsection{Additions}{ \itemize{ \item Added formula input to densityBy \item Added grouping option (and formula input) to scatterHist \item Fixed a minor confusion between main and title in fa.plot \item Add minlength parameter to lowerCor and lowerMat for length of abbreviations } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a problem with crossValidation where it would not work with a single criterion \item Fix a problem with alpha if the number of categories differed drastically and max was not specified. Reported by Daniel Bradford. \item Fixed a bug in geometric.mean where it was not processing na.rm for the case of a single vector (reported by Jesse Aldersliesten) \item Fixed a bug in mediate where the standard errors of the boot values were wrong (reported by Felicia Zerwas) } } } \section{Changes in psych version 2.0.8 (2020-8-31)}{ \subsection{Additions}{ \itemize{ \item Added legends to densityBy \item Modified unidim to give cleaner output. Added two new fit statistics as well. \item Added cross references to fa.multi with fa and omega. \item Dropped sem as a suggested package. Basically, I was no longer using it, and had replaced the functionality with lavaan. This makes psych a lighter package since sem indirectly requires a number of packages. \item A problem in fa or fa.stats reported by Kyle Husman (something is wrong with promax solutions) was actually a problem in factor collapse in coarse coding. Modified fa.stats so it pads out NAs for those correlations that don't exist \item Continued the addition of formula mode to various descriptive functions (describe, describeBy, densityBy) } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in iclust.diagram introduced in 2.0.7 (substitute does not work work on vectors) \item Finally fixed a bug in testRetest where I was flipping two of the components of variance (id x time and ID x items) Reported by Miquel Biron=Lattes \item Added a warning when doing omegaSem with one factor (amd made it work -- reported by Erich Studerus) \item Fixed the order of plotting variables in bassAckward.diagram (was plotting them in reverse order) \item Fix scoreItems to think more carefully about what to do if an item has no variance. Score it anyway, just don't use it for item statistics. (raised by Eric Green). It turns out the delete=FALSE option is scoreItems was doing this anyway. \item Found a fixed a bug in alpha where the imputation was not working quite right. } } } \section{Changes in psych version 2.0.7.31 (2020-7-31)}{ \subsection{Additions}{ \itemize{ \item Improved summary and print functions for faBy (since I was working on it anyway) \item Added simCor (alias for sim.correlation) which allows for several types of skewing of the data, as well as dichotomizing it at cut points. } } \subsection{Bugs Fixed}{ \itemize{ \item Modified iclust.diagram and more importantly, multi.arrow to address a problem introduced in version 2.0.7 which although it worked in R.4.0, broke in R.3.6.3. The problem was that the change from 3.6 to 4.0 introduced a change in the way strings are treated as factors. This is set by default to be FALSE in 4.0, but needed to set stringsAsFactors=FALSE when forming data frames when doing multiple graphics. The implication of this is that 2.0.7.31 will work for 3.6 and the version on CRAN (2.0.7) works for R >= 4.0. Note that another problem was introduced in the iclust.diagram function by this fix. (see psych 2.0.8) \item Fixed bug in faBy so that it realizes we have done a statsBy already (reported by Lisandro Colantonio ) } } } \section{Changes in psych version 2.0.7 (2020-7-26)}{ \subsection{Additions}{ \itemize{ \item Added crossValidation to be compatible with setCor and bestItems \item Added the weights option to statsBy (requested by Joshua Tuttle) -- still needs to be worked on for weights within groups \item Minor fix to error.dots to allow more control on the figures and to make sure that we are calling psych::describe \item Now reports the keys used in scoreItems \item Added the spengler data set as an example for mediate \item Improved faLookup to handle bassAckward output \item Improved the print and summary from mediate to include the intercept term. \item Improved bassAckward.diagram to allow labeling of higher level factors and to take advantage of the improvement in diagram. \item Improved several of the diagram functions (e.g. bassAckward, fa.diagram, iclust.diagram etc.) to draw the variable boxes and the path labels as vector calls. This leads to an impressive increase in speed when drawing the figures. \item Improved char2numeric to handle case of characters not stored as factors \item Improved mediate to use multicores (if available) to speed up the bootstrapping for big problems. \item Improved describe to handle case of characters not stored as factors \item Added responseFrequency as alias to response.frequencies and added class psych. \item Added the ability to use formula input for describe (calls describeBy) and describeBy (see the revised help page) \item Added formula input to cohen.d and cohen.d.by (allowing a cleaner call to cohen.d for 1 grouping or 2 grouping variables) \item Added formula input to violinBy, densityBy, histBy. \item Added the ability to lookup which items in a dictionary have a certain content (lookupItems) \item Added the num.cat option to polychoric for those determined users who want more than 8 categories. \item Replaced solve with Pinv in the partial.r function. This allows for inverses of matrices that are not positive semi-definite. \item Commented out several tests for bestScales in order to pass the Debian timing tests on CRAN } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed summary for setCor to properly print betas (coefficients) Error introduced in 1.9.12.31 (reported by Hans Marius Norbom) \item Fixed bug in bassAckward for orthogonal rotations (reported by Eva Billen ) \item Fixed bug in fa.congruence for case of factor loadings provided by user rather than a psych function (reported by Lisa Debruine) \item Changed the way to find the pooled correlation in statsBy by finding the average fisherZ transformed correlation (back transformed to r) rather than just using the average weighted r. \item error.bars (which was calling error.bar.by) was not passing all the parameters to error.bars.by. (Reported by Lorien Elleman). \item errorCircles was not plotting error circles if there were NAs in any of the descriptive statistics\ \item Fixed the equamax call to cfT to include kappa in the call (reported by David Condon and Gerard Saucier) \item Fixed passing parameter to Equamax (reported by Erich Studerus) \item Fixed alpha to correctly report the alpha if dropped values for the case of nvar=2. I was right for the first item, not the second (reported by Thomas Buhl) \item Fixed bug in statsBy that was not returning ci.wg for the case of just two variables (reported by Kate R) \item Fixed typo in spelling of confirmatory of omegaSem \item Fixed a bug in bestScales for the case of very few items (reported by Tim Kaiser) \item Finally fixed a bug in polychoric where it would not properly correct for continuity in the case of different number of response alternatives. (Partly following a suggestion by Ben Mainwaring. ) } } } \section{Changes in psych version 1.9.12.31 (2019-12-31)}{ \subsection{Additions}{ \itemize{ \item Changed the solve(R) to Pinv(R) in cohen.d when finding Mahalonobis D. \item Modified the examples in bestScales to not test while doing CRAN installation, in order to pass the Debian timing constraints. \item Changed smc to us Pinv rather than solve to find the inverse. \item Changed isCorrelation to isCovariance in alpha to allow us to find alpha from covariance matrices \item Changed isCorrelation to check for symmetry, diagonals all = 1 and values -1 <= x <= 1 \item Added the y option in polychoric to match tetrachoric and the various cor functions. This allows asymmetric correlation matrices. \item Added a number of different correlation options to pca/principal to match those in the fa function. \item Added setCor to the models that can be used for prediction in predict.psych } } \subsection{Bugs Fixed}{ \itemize{ \item fixed tetrachoric to handle y variables as well as x variables. Further modification to correctly handle empty cells. \item Fixed fa.lookup to properly handle negative signs in names of omega output \item Fixed response.frequencies to handle tibbles following a suggestion from Mikko Ronkko. } } } \section{Changes in psych version 1.9.12 (2019-12-15)}{ \subsection{Additions}{ \itemize{ \item Moved various larger data sets to psych tools \item Added summary function for cohen.d \item Added summary function for testRetest \item Added col option to AUC to allow for black and grey AUC curves \item Item improved irt.stats.like an added make.irt.stats to allow importing external item information (Requested by Zara Wright) \item A considerable improvement in speed in pairwiseCount by using crossprod instead of normal matrix multiplication \item Added the ability to find confidence intervals for the r.wg and r.bg in statsBy (requested by Sandrine Muller) \item Added the ability to find confidence intervals from a correlation matrix using the corCi function. \item Added a note to corPlot about the problem with correlations > 1 (pointed out by Ryne Sherman). Then added the option to make all values > 1 the same color. \item Added a new function (scoreWtd) to allow for weighted scoring. Also improved bestScales to find weighted as well as unit weighted scores. \item Added a diagnostic message and error counts in multilevel.reliabilty for the alphaBy function following a query by Randy Lee. \item Added sim.bonds to simulate the Thomson (1916) bonds model of independent factors with cross loadings. \item Finally changed the default for corPlot to be numbers=TRUE. \item Modified setCor and mediate to find the intercepts and standard errors of the intercept. \item Modified scoreWtd to allow for input from lm or setCor \item Modified error.dots and cohen.d to allow for error.dot plots of cohen.d results \item Added a sort option for cohen.d \item Improved bestScales so that it can report scales (both weighted and unitweighted) for multiple values of n.items (from low to high). Suggested by Lorien Elleman and Sarah McDougald. \item Added aslist as an option to factor2cluster \item Added the intercept term in mediate \item Implemented a small change in fa2latex to format h2 as h^2 (requested by Alex Weisss) \item Added the ability to compare models using the anova function for setCor and mediate. \item Added the ability to specify item difficulties and discriminations for scoreIrt (requested by David Condon and Zara Wright) } } \subsection{Bugs Fixed}{ \itemize{ \item Problem with scoreIrt.1pl with keys of length 2 \item Confidence intervals in ICC were incorrectly based upon alpha/2 instead of alpha. (reported by Wei Chen). Fixed. \item added main to plot call in mlr \item added sqrt of communalities in fisher.test in circ.tests (reported by Lisa Barrett and Jiahe Zhang) \item Take the square root of communalities in polar \item Fixed scrub (introduced a bug in 1.9.4 that prevented it from working) \item Fixed bestScales to handle matrix input (detected by Anne Zola) \item Minor tweak to statsBy so it will not find r.bg correlations for two groups (since this is meaningless). \item Corrected the definition of S/N in the writeup for alpha. (Error reported by Carla Leal Kaymalyz) \item Finally fixed mediate to handle 2 or more DVs \item Fixed a bug in factor.residuals to pass Phi to factor.model. (Reported by Asghar Minaei) \item Painfully changed from using class(x)[2] to inherits(x,value) to respond to changes in R 4.0.0 in most functions. \item Added the Phi to the fa object in the bassAckward output to allow for factor output with loadings, correlations, and SS loadings. (In response to a question by Niclas Kuper.) } } } \section{Changes in psych version 1.9.4 (2019-04-11)}{ \subsection{Additions}{ \itemize{ \item Added dictionary capabilities to manhattan \item Added top as an option to dummy code. This will give just the dummy codes for the top most frequencies \item Added a check for keysList and items in scoreIrt.2PL and others for the case where we mistakenly reverse them \item Added the ability to take correlation input for manhattan \item A serious improvement to bestItems so that it actually is useful \item Added lookupFromKeys as a useful helper function \item Added TargetT to do orthogonal target rotations. (It was already there, just not visible.) \item Added faRotate to allow many types of rotations (also already there, just not visible). In response to a suggestion by Alex Weiss. \item Added Target as an option to faCor (so that we can compare target rotations) \item Added correction = .5 to AUC in response to a problem with 0 cell entries reported by Jeff Crukley. \item Added the 'mollycoddle' feature to scoreItems, scoreFast, scoreVeryFast and setCor to check for bad input. (Suggested by Tim Bates). \item Added guttman.lambda2 to splitHalf (requested by Muirne Paap) \item Added Pinv to find pseudo inverses to allow calculations in schmid (however, this is not actually necessary). (Adapted from ginv from MASS.) \item Add scores to the output of bestScales in units of criteria (actually done in the predict function) (requested by Lorien Elleman) \item Added the ability to use "schmid" as a rotation option in bassAckward (requested by Joanna Hartung ) \item Added v.lab option in error.bars.by (as requested by Maike Luhmann ) \item Modified scrub so that it will process categorical data. \item Added a bestScales prediction to the predict.psych function following suggestions by Sarah McDougald. \item Cleaned up bestScales so the code is a bit cleaner. Improved the documentation for bestScales and predict.psych to show how prediction works with bestScales. \item Added the vars option to biplot so that we can draw arrows to the data or to the variables. } } \subsection{Bugs Fixed}{ \itemize{ \item checked the BDI scores in affect -- they seem strange, but are in fact correct. They are mean scores. \item check the eigen values in iclust - something is wrong \item Fixed problem in Hedges g (reported by Chuck Powell) \item Fixed a problem in isCorrelation for a square, symmetric matrix which is not a correlation matrix. However, we needed to fix this again by using all.equal. \item Fixed a problem with mlr/multilevel reliability for the lme decomposition. Variances were indexed by location rather than by name. This led to a problem for certain cases. (Reported by Chelsea Dickens) \item Fixed manhattan so that it will plot -log p values that are inf (by replacing with the largest non-infinite -log p \item Fixed inconvenience (not really a bug, just poor documentation) in alpha with complete keys specified by location instead of name (reported by Richard Pond) \item Fixed a bug in corr.p where it was not labeling the confidence intervals correctly. (Reported by Jeffrey Lees.) \item Fixed bassAckwards for problem with orthogonal rotations (reported by Joanna Hartung)) \item Fixed bug in RMSEA calculation in factor.stats introduced in 2017 (reported by Michael Lee Crowe ) \item Fixed bug in statsBy for reporting correlations with multigroups. It was not labelling the results correctly. (reported by Eva Skimma) \item There seems to be a problem with fa.parallel when using multi-cores on Microsoft Open R. See the note for fa.parallel for a fix. ) } } } \section{Changes in psych version 1.8.12 (2018-12-31)}{ \subsection{Additions}{ \itemize{ \item Added cohen.d.expected to find the expected value of the M.dist. It is sensitive to sample size and thus has a non-zero expected value. \item Added dictionary option to cohen.d to include a list of items used. \item Added the ability to do MIMIC diagrams from lavaan output in lavaan.diagram \item Added the ability to specify the size in ellipses drawn by scatter.hist (requested by Jacques-Daniel Basic ) \item Modified dfOrder to allow a global direction (ascending, descending) option, and also to handle matrices \item Added the ability to use stats in error.dots (comparable to error.bars) \item Continued to improve bestScales \item Minor changes to lookup and selectFromKeylist to make compatible with bestScale issues \item Added the log.p option to bestScale \item Invisibly return the correlation matrix (if found) from corPlot \item Added matSort as an alias to mat.sort \item Changed the documentation in cohen.kappa following a suggestion by Kelly Reeve \item Added the fparse (formula parse) function to the namespace to make it public \item Modified setCor and mediate to handle quadratic terms if specified in the formula input. \item Added manhattan to do Manhattan plots of correlations versus a criterion. \item Added the ability to impute data in alpha \item Minor changes in fa.organize to allow for matrix input \item Minor improvements to esem and esem.diagram to allow for lavaan model output. \item Added bassAckward to do the bass-ackward approach of Goldberg (see also Waller). This returns the correlations of factors across solutions and draws a diagram. \item Added faCor to find the correlation and congruences between two factor solutions of the same data. \item Added extension.diagram to make cleaner fa.extension diagrams. Can be called from diagram() \item Improved the documentation for fa.extension to explain more applications. \item Added EPI personality scales, as well as film and drug condition to msqR. Re-arranged the order of variables in msqR to be more user friendly. \item Changed the message in fa.stats about bad solutions from a message to a warning (which can be suppressed using suppressWarnings). } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed omega.diagram when drawing from a loadings matrix \item Fix ylim parameter in plot.irt so that it actually works. \item Fixed setCor so that it can handle single x variable with raw data. \item Fixed corr.test so that it actually does use min.length \item Fixed alpha so that it properly warns when it needs to reverse keys (incorrectly omitted this feature in the 1.8.8 release. Reported by Mohammad Latifi) \item More importantly, fixed alpha so that it actually follows the help file. That is to say, it now will score short subscales appropriately. \item Fixed bug in setCor when doing partial rs. \item Fixed a related problem in mediate/moderate/setCor in terms of df when doing partial r. \item Fixed a problem in mediate when doing just regressions with no mediation and just the correlation matrix. \item Fixed biplot.psych so that it keeps the correct frame of reference so that abline(h=0) will work. (Reported by Fabio Daniel Trinco.) \item Minor tweak to setCor to properly print unweighted multiple R } } } \section{Changes in psych version 1.8.10 (2018-10-30)}{ \subsection{Additions}{ \itemize{ \item Changed the link from icar-project.com to icar-project.org to provide more stability \item Improved the graphics of structure.diagram (tweaked the e.size parameter). } } \subsection{Bugs Fixed}{ \itemize{ \item When the fix for fa to handle covariances correctly was made in 1.8.9,(September) another error was introduced that led to improper estimates for small problems. This has been corrected. \item Fixed problems with generating code for lavaan and sem for correlated models in structure.diagram \item fixed a bug in dia.arrow that was causing problem lavaan.diagram problem} } } \section{Changes in psych version 1.8.9 (2018-10-04)}{ \subsection{Additions}{ \itemize{ \item Added minlength=5 to corr.test (requested by Felipe Dalla Lana) \item Added robust version of cohen.d (d.robust)(See algina, 2015) \item Modified keys2List so that it does not reorder the variables in the keys. This is useful when we use scoreItems and want to use the corrected.cors for estimating the correlation matrix, or other uses. \item Improved bestScales so it specifically does k-fold cross validation and bootstrap aggregation (bagging). Improved the documentation a bit. That is, for bestScales consider the implications of .623 sampling due to bootstrap for the cross validation sample. Why do 90-10 when we are already doing 63.2/36.8? \item Added a summary for bestScales \item Added a few more descriptives to alpha (min.r and max r) \item Added Niels Waller's direct Schmid Leiman solution (see Psychometrika 2017) as a rotation option and the function directSl. \item Using Niels function, also added omegaDirect to calculate omega this way. \item Added Procrustes rotation (from Niels Waller) \item Added a few more options to fa.lookup \item Fixed omega.diagram so that variables are sorted by F* factors. \item Cleaned up omega.diagram to allow for omega.extension output and omegaDirect output. \item Added intercept and confidence intervals for betas in setCor (requested by Franz Strich) \item Added an nfactors option to fa.parallel (requested by Florian Scharf). \item Added the add option to error.dots (allows for overplotting with dotcharts) \item Added the cluster option (now the default) in fa.organize \item Added the ability to plot single data vectors in error.dots \item Added grouping variable to dummy.code. \item Added scoreVeryFast to find sum scores for large data sets. \item Parallelized scoreFast and scoreVeryFast for faster processing. \item Added pairwiseImpute to impute correlation values for structured data \item Added pairwiseReport to identify particular item pairs with low counts \item Changed the name of count.pairwise to pairwiseCount to be consistent with family of pairwise functions (count, describe, impute, report) \item Clarified the parameter settings in r.test (following a request by Cassandra Brandes) \item Added the order variable parameter for error.dots to allow forcing a sorted order. \item Modified sim.structural/sim.structure so that when simulating items, it more closely matches the factor model requested. It was generating the data and then simulating the items with one more pass through randomness, leading to structures that did not meet the requested structure. \item Modified error.bars.by to take formula input and to draw two grouping variables appropriately \item Added m2t to find t.test from 2 means and 2 standard deviations \item Added cex and l.cex to mediate.diagram, moderate.diagram, and setCor diagram following a suggestion by Shu Fai CHEUNG \item Modified multi.hist to give more control over margins and titles \item Improved scoreOverlap to allow for raw data input (but, of course, this will still not result in scores). \item Modified bestScales to name the best.keys object. Improved the documentation to emphasize the general utility of the function. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed documentation in AUC which did not have specificity correct \item Fixed Draw tetra to correctly find (phi) and to report the 2 x 2 matrix \item Fixed alpha so that it names the keys when it returns them. (not a bug, just a pain that it did not). \item Fixed bug in mediate diagram for 2 DVs, one IV \item Fixed setCor.diagram to plot residuals for the correlations for the DVs \item Added weights option in fa call to tetrachoric and polychoric (thanks to Suzanne Dufault for the report) \item Fixed the bug in print.psych.mediate that was improperly reporting the indirect effect for the second IV. \item Fixed a problem in fa where with the covar=TRUE option and covariances > 1, the results were incorrect (reported by Phillip Doebler). \item Corrected code in cosinor to handle radians correctly (reported by Obioha Durunna) \item Fixed fa.parallel so that the default for quant is .95 and is actually used. \item Fixed principal so that the oblique target rotations are labelled as transformations (Ti instead of Ri) (Reported by Wu Hao) \item Fixed a problem in scoreOverlap and scoreItems for the case of a single, negatively keyed, variable for a scale. Problem due to an infelicity of the diag function. \item Fixed bug in splitHalf (not correctly reporting the mean split half. Reported by Wes Bonifay.) The problem was we were not counting the first split. \item Fixed the dates of various parts of the news file so that they read correctly, and thus the Version > "1.8.4" trick works \item Fixed the sign of t2r for negative ts (detected by Ashley Brown) } } } \section{Changes in psych version 1.8.4 (2018-04-30)}{ \subsection{Additions}{ \itemize{ \item When using multiple raters in cohen.kappa, report the average (Light's kappa) as well as the individual pairwise kappas. \item Added fromTo to allow choosing variable sequences by names of first and last \item Added cs (taken from Hmisc:::CS function) to convert non-quoted strings into quoted vectors. \item added acs to convert input into quoted strings. \item Added median r to alpha, scoreItems, scoreOverlap, splitHalf (this serves as a signal for poor scales if the median.r differs very much from the av.r) \item Improved bestScales to allow easier bagging of results. \item Improved the documentation of bestScales to emphasize the bagging nature of the function. Separated the documentation from fa.lookup and lookup. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a problem with describe when describing a single column data frame. Reported by Vilmantas Gėgžna. The problem is with the way sd handles data.frames. \item Fixed mediate so it will do regressions without mediation ala setCor. \item Fixed a bug in corr.test for the case of single x or y variables (reported by Alex Weiss) \item Fixed a bug in alpha where it was not working from a correlation matrix with n.obs specified. (Reported by Nicholas Stefaniak). \item Fixed some problems in bestScales reported by Elizabeth Knowlton. } } } \section{Changes in psych version 1.8.3 (2018-03-21)}{ \subsection{Additions}{ \itemize{ \item Minor changes in documentation (e.g. in alpha, violinBy, corr.test, ...) \item digits option to print(x, all=TRUE) in print.psych \item Modified violinBy to treat character variables as numeric (with notification) and to adjust the minimum and maximum in the case of groups. (Following some suggestions by Christie Nothelfer.) \item Added the omit option to describe to just describe numeric data (another suggestion by Christie Nothelfe) \item Modified p values in corr.test to allow for very small p by using the log.p option (suggested by Nicholas Clark) \item Modified read.file and write.file (etc) to allow file= as well as f=. This is to make it more obviously compatible with save.file(). \item Dropped all sex > 2 values in spi. \item Improved the dictionary for spi to include the demographic codes. \item Added the density option to error.bars (suggested by Anne Zola). \item Added the na.rm=TRUE option to dummy.code (suggested by Elizabeth Knowlton) \item Added the ability to have criteria as a separate object in bestScales \item Added formula option to setCor \item Improved the input to setCor and mediate to allow for formula input. Also allowed interaction terms in setCor for parallelism with mediate. \item Improved bestScales so that it summarizes the results of iterations. Fixed a problem with the overlap option so that it correctly chooses the number of items. \item Improved dfOrder so that it sorts dataframes more readily. \item Greatly improved setCor and mediate to allow for multiple interaction terms and to include higher level interactions. Both functions now return the transformed data when doing interaction terms. Improved the print.psych function for setCor. \item Added a HowTo for mediation and moderation, cleaned up the other HowTos to allow them to cross reference each other. \item Added the Garcia and Tal_Or data sets for demonstrations of moderation and mediation. \item Added the ability to partial out variables in mediate. \item Switched to using the log option in pnorm for probabilities in pt and pf \item parallelize iterations when bootstrapping fa (did this some time earlier, but had not taken if off the to do list) \item Added alpha option to corPlot (to allow semi-transparent plots) \item Dropped the density =50 option and changed to alpha=1 in violinBy. This leads to much faster plotting. \item Added the densityBy function to draw multiple histogram/density plots to show the overlap between two or more distributions. Can also just draw the density for one distribution. \item Modified the call for bi.bars to be consistent with densityBy and violinby calls. \item Minor improvement in vignette documentation \item Added the AUC (area under the Curve) function. This basically adds signal detetection capability as an option for dichotmous data (and reports such measures as sensitivity and specificity as well as d'). \item Added alpha.ci to calculate confidence intervals for alpha based upon Feldt (1987) \item Added confidence intervals to splitHalf function. \item Added the df in mediate \item Modified the call sequence to matReg to make it more transparent in mediatethe individual ab paths in the mediate function (requested by Heather Urry) \item Added the describeFast function for quick counts of large data sets. \item Added the filesList function to display all the files in a directory (using file.choose to find the directory). \item Added the fileCreate function to do what file.choose() should do for RStudio \item Added the filetype option to read.file and the ability to write without a suffix to write.file \item Added filesInfo for information about the files in a directory \item Completely rewrote the dfOrder function after finally understanding the help menu for order. \item Added testRetest to do test-retest reliability and a number of related statistics. \item Added checks for non-numeric data and data with no variances to mixedCor. \item Added a check in mlr so that it will only find within subject alpha if n.times > 2 (This was throwing a warning if using mlr to check testRetest results.) \item Serious rewriting of mixedCor with the hope of making it faster (and more robust to cases where we have unequal number of categories in polytomous data). Perhaps solved the problem of poly.di blowing up if the polytomous have a different number of categories. \item Fixed the degrees of freedom in setCor and mediate to reflect the actual numbers of subjects in the correlations, not the number of cases in the file. \item Modified ICC to use lmer from lme4 to handle missing data and to report variance components. Set the default to use lmer. This is particularly useful if doing large files. \item Added the theta parameter to the sim.irt family of functions to allow for specification of the theta across simulations. (Requested by Yuming Liu.) \item Added DAT as a possible data file type to read.file. \item Cleaned up the diagram.Rd file demonstrations to look nicer. \item Added a min.length parameter to corPlot to allow for abbreviations of row and column names. \item Added the sai and tai data sets (state anxiety and trait anxiety measures from the PMC lab) for demonstrations of test-retest reliability, validity, etc. \item Changed all the http:// references to https:// to get around problems with failure to read http:// files. } } \subsection{Bugs Fixed}{ \itemize{ \item Added ci <- sef <- ci.adj <- NULL in corr.test to fix problem reported by Nicholas Clark 9/13/17 \item Fixed an inconsistency in mssd between the single vector case and the matrix/data.frame case. Was dividing by n-lag -1 instead of n- lag. \item Clarified documentation for mssd (problem raised by Dennis Mongin) \item Discovered that write.file(new=TRUE) works only in R.app and not in RStudio. This seems to be because core-R ignores the new=TRUE option. \item Correct was not being passed correctly in fa for doing polychoric or tetrachoric correlations (detected by Jason French, 10/10/17) \item Fixed improper error message bug in cohen.kappa reported by Johannes Bracher \item Fixed spi.dictionary so that it labels variables correctly. \item Fixed a print error in mediate for the a path (probabilities were displaying incorrectly.) \item Made the digits option actually work in pairs.panels (reported by Federico Filipponi) \item Fixed a bug in fa for some problems (e.g. fa(bfi[1:5], covar=TRUE) \item Fixed a problem with diagram.curve and another in diagram.curved.arrow \item Fixed a problem with plot.fa.parallel for the case of plotting just one line. (reported by Daniel Morillo). \item Fixed the lavaan and sem models for omegaSem to handle reversed items properly (this involves gsub for - signs in names). \item Fixed the ab boot values in mediate when we partial out variables (reported by Dan Molden) \item Corrected the BDI scores in the affect data set. They had been incorrectly the EA1 scores. \item Modified the help file for read.file to explain more clearly how to read .rda files. \item Modified polychoric so that when it tests for number of categories, it is not limited by 1-5 or so, and can handle 0,20,40,80, 100 if these are few enough categories. \item Changed the (1-alpha) "\% confidence" to (1-alpha)*100 "\% confidence " in error.bars.by (requested by Victor González Fernández) \item Fixed the residuals reported in fa.extend (not giving correct values for uncorrelated factors --reported by Deon DuBruin) (and modified the print statement \item Changed the degrees of freedom in paired.r to be n-3 to match r.test (and Steiger 1980) \item Fixed setCor so that the ruw values are correct in the case of negative correlations. (reported by David Condon). \item Added left and right brackets to variable names in df2latex to allow stars to print correctly in LaTex \item Corrected the definition of the objective function in principal (error pointed out by Andre Beauducei) \item Fixed cor2latex (and df2latex) so that large negative correlations can be bold faced as well as large positive correlations (pointed out by Anne Zola). \item Fixed the label option in error.dots so that it actually works. } } } \section{Changes in psych version 1.7.8 (2017-08-31)}{ \subsection{Additions}{ \itemize{ \item Completely rewrote partial.r for readability and also to add the ability to find the anti-image (that is to say, to partial all the variables from each other in correlation matrix.) \item Added some options to autoR. \item Added the gr option (for color gradients) to corPlot following a suggestion from Anne Zola and Lorien Elleman \item Added fa.random to start doing random effects factor analysis. \item Added cohen.d to find cohen.d statistic for two groups on a number of variables. Also reports the Mahalanobis distance. \item Added Mahalanobis distance for groups (see del Giudice, MBR, 2017) \item Added d.ci (cohen.d.ci) to find confidence intervals for effect sizes. \item Added d2t, t2d to do basic conversions of effect size. \item Rewrote bestItems and bestScales to allow raw data or correlation matrix input and to be much faster. \item Added the pc option to fa.diagram to allow the arrows to come from the observed variables if the data are a principal components solution. (suggested by Michael Wood). \item Added selectFromKeys to allow selecting items based upon a keys.list \item Added fa.sapa to allow iterated factor solutions across random subsets of subjects. Basically just a tool to examine the stability of sapa based solutions. \item Completely rewrote pairs.panels to a) add confidence regions for regressions and for loess fits. Changed the smooth function to be loess rather than lowess. Adjusted the histograms so that values line up with the labels. Changes suggested by Julian Martin. \item Added alpha factor analysis as an option to the fa function. \item Converted package to be byte-compiled. This seems to result in a 7 - 10 \% improvement for e.g., factoring a 1000 x 100 categorical data matrix (mean of 61 elapsed vs. 65 secs). \item added sem.diagram and sem.graph which call structure.diagram and structure graph with sem output. Still having problems sporadically with RGraphviz. \item Added in cross validation as an option to best.scales/bestItems. \item Modified alpha so that it can just select a small subset of items to score \item Added a comment to the r.test function about precision of p values (suggested by Julia Rohrer). \item Add an additional object to corr.test and corr.p to give Bonferonni and Holm (default) adjusted cis (requested by Lorien Elleman) \item Added the name for corPlotUpperLowerCi to be the camel case equivalent of cor.plot.UpperLowerCi \item Changed the se.bars=TRUE to se.bars=FALSE in fa.parallel to make the decision rule more transparent \item Modified fa so that it will not die if nfactors = nvar \item Changed messages to warnings in polychoric. This allows fa.parallel to run in parallel (and therefore much faster) without crashing. (Based upon a problem reported by Guido Biele). \item Added the diag option to residuals and resid. \item Added the blant data set to show factor analytic problems. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed problem in cohen.kappa to correctly find the variances of weighted kappa (reported by Lisa Avery). \item Fixed mssd to correctly divide by number of cases-lag instead of n.obs - l - lag (reported by Charlotte Vrijen) \item Fixed problem with alpha for two item scales (I was not correctly reporting the alpha if one item was dropped.) Reported by Daniel Zingaro. \item Patched cohen.kappa so that it warns instead of breaking if given identical ratings for everyone (reported by Caroline Wehner). \item Added the factor names for old.min(res) solutions. \item Fixed (finally?) a bug in mixedCor (coming from polydi) that was incorrectly reporting half of the correlations as 1.0. This was causing improper solutions in any data set that had more than 1 dichotomous variable and more than 1 polytomous variable. \item Fixed scoreIRT.2pl so that it does not choke on 1 items scales \item Modified fa so that it will report the factor score correlations for the chosen scoring method as well as the regression based scores. Although factor.scores was reporting the correct R2 values, fa was always reporting the regression based R2 values. Also deprecated the oblique.score option. (Based upon trying to answer a question from Jeannette Samuelsen.) } } } \section{Changes in psych version 1.7.5 (2017-05-01)}{ \subsection{Additions}{ \itemize{ \item Added read.xport to the read.file so we can read SAS export files. Also added the widths parameter so that read.file can read a fixed width file. \item Added select option to scoreItems and scoreOverlap to just score the items selected (to enhance speed for big sets of items and short scales). \item Added a private function (selectFromKeyslist) to handle numeric and character keys.lists when doing selection. \item Added irt.se to find standard errors based upon the test information \item Added show = FALSE to setCor to allow the option of showing the unweighted matrix correlation (Had removed this in prior release, but decided for teaching purposes, is is convenient.) \item Added the ability to define the separator when reading csv files in read.file \item Added breaks=21 to multi.hist to allow for better plotting control \item Added the ability to plot asymmetric correlation matrices in corPlot (requested by Anne Zola) \item Added the correction option to fa.parallel when doing tetrachoric or polychoric correlations. It had been set to .5 without the ability to set it to 0. (Inspired by a question from Ismail Cukadar). \item Added stars as an option to pairs.panels and corPlot. This is for those people who insist on showing 'significance' values. Also added scale as an option when showing the numbers (to show significance by number size.) \item Added quickView to complement headTail and topBottom. Added from and to parameters to all three functions. \item Added top=4 and bottom=4 to replace hlength and tlength in headTail and topBottom. Makes more obvious code. \item Added Vaccounted for fa and pca (as requested by various stackoverflow users). \item Dropped the message about matrices not being square in cortest. (Just an irritant when doing R CMD check and testing the intro.rnw file)#Adapted from the help for pairs #modified December 15, 2011 to add the rug option #further modified March 30, 2012 to add the method of correlation option (suggested by Carsten Dormann). #Fixed a bug in show.points on March 18, 2017 (reported by Matthew Labrum) #by moving all the little functions to be outside the main function, this allows method and rug to be passed to these lower order functions. #this should allow for somewhat cleaner code for the other functions #modified March 15, 2015 to add the ability to control the size of the correlation separately from the cex variable in the points #also added the ability to set the number of breaks in the histograms #Also completely reorganized the main function to be much cleaner #would like to add the smoothScatter function as an option \item Modified plot.parallel to go from ymin rather than 0. Suggested by Hao Wu. \item Added confidence intervals for the ICC1 and ICC2 to the output for statsBy (requested by Lorien Elleman) \item Added mixedCor as an improvement to mixed.cor. Somewhat cleaner interface. } } \subsection{Bugs Fixed}{ \itemize{ \item Modified the minres solution in fa following very helpful suggestions from Hao Wu. Basically, the first derivative wrong and the solution was slightly incorrect. This has now been fixed for minres. fm="old.min" keeps the old solution. The fm="uls" uses empirical first derivatives works to give the unweighted least squares solution. This is discussed in the fa help page as well. \item Fixed RMSEA error (as reported by Hao Wu). In version 1.6.12 I had changed the RMSEA formula from the Tucker formula to the one used in sem and now have reverted to the Tucker formula (as described in the help page.) \item Fixed cor.plot.upperLowerCi so that it works again and also reports replications \item patch to mixed.cor for figuring out what kind of data we have. Basically, set all minima to 1 so that polydi won't croak \item moderate.diagram was drawing incorrect X paths. \item plot.irt was not labelling factor numbers \item t2r was not taking the sqrt and thus was giving much to small rs. fixed. \item In describeBy, changed nrow to NROW to correct error thrown by the affect data set for the development versin of linux (although that seems to have been a temporary error.) \item Modified isCorrelation yet again to treat the case of symmetric correlation matrices stored as data frames. (Used in some class examples!) } } } \section{Changes in psych version 1.7.3.21 (2017-03-21)}{ \subsection{Additions}{ \itemize{ \item Added sim.multi to simulate and draw within subject data. \item Minor modifications to corPlot for labelling (and added a comment to the help file to show how to do multiple plots to answer a query from Yuji Shimohira Calvo) \item Modified cohen.kappa so that the number of levels can be specified if there are some levels that are missing completely (requested by Amy Finnegan). \item Added multilevel.reliability (mlr) to find various generalizability coefficients for three way (subjects x time x items) data following chapters by Pat Shrout and Sean Lane. \item Added a helper function (mlArrange) to convert semi-wide data into long data. Useful for multilevel analyses of three way data sets. \item Added mlPlot to plot multilevel (id x time * item) data. \item Added a plot option to fa.parallel to allow suppression of plotting (requested by David Copndon) \item Added autoR to find autocorrelations based upon mssd. This is useful for multiple time points. \item Added a trivial helper function r2c (and cor2cov) to convert a correlation matrix to a covariance matrix. \item Added VIF (Variance Inflation Factor) which is just 1/(1-smc) to setCor. \item Added a note to alpha about the dplyr problem and added a fix to the problem (as reported by Adam Liter). \item Added a discussion to the fa.rd documentation discussing the problem of estimating the minimum residual. (Inspired by correspondence with Hao Wu and Mikko Ronkko). \item Added a new vignette (intro) and modified the overview vignette. Meant to make the introduction and overvies easier to read. \item Added a new function: unidim to estimate the unidimensionality of a set of items. \item Inspired by a question from Jared Smith, added several new features to scatter.hist, as well as removed a few bugs. \item While patching pairs.panels (see bugs fixed), add smoother option to all the other options. \item Added cor2cov (and then temporarily removed because of conflict with lavaan.shiny call to lavaan::cor2cov). } } \subsection{Bugs Fixed}{ \itemize{ \item ScoreIrt: corrected cut so that it passes through to lower level functions (reported by David Condon) \item rownames for corr.p$ci were incorrectly labeled (reported by Lorien Elleman) \item Fixed a bug in finding RMSEA and the confidence intervals thereof. (Reported by Hao Wu). Possibly the error was introduced in 1.6.12 \item Fixed plotIrt so that it correctly rescales ylim for each factor rather than just the first factor. \item The lag parameter in mssd was not being used. Fixed. \item The handling of missing data by groups in statsBy is fixed for finding ICC1. This was producing seriously incorrect ICC1 and ICC2s with a great deal of missing data. That is for SAPA data, when grouping by ZCTA, many items were missing. (Reported by Lorien Elleman). \item Fixed a bug in scoreItems and scoreOverlap when keys are already a data.frame rather than a matrix. (Reported by Jeromy Anglim). \item Cleaned up various problems in mediate and print.psych.mediate so more complicate problems will work. \item Modified isCorrelation to test for symmetric matices but dropping colnames and rownames. (This led to a problem with the neo data set. Reported by Hao Wu.) \item Changed the neo data set from data.frame to matrix. \item Fixed omegaFromSem and print.psych.omega to handle case of 1 factor sem model (which would normally be thought of as inappropriate, but some people wanted.) \item Fixed a problem with printing describeBy output. (Reported by David Uttall.) \item Fixed a problem with esem for the case of having more variables in the correlation matrix than being analyzed in the esem statement. \item Fixed a bug in pairs.panels where it was ignoring the show.points option in several option cases. (Reported by Matthew Labrum). } } } \section{Changes in psych version 1.6.12 (2016-12-31)}{ \subsection{Additions}{ \itemize{ \item allow scaling of circles in error.circles (suggested by Josh Wilt) \item Added char2numeric to solve a problem of categorical data from a questionnaire. \item Added a helper function dfOrder to sort data.frames on multiple columns. \item Added interbattery for interbattery factor analysis. \item Changed the print line for MLE chi square to say model based chi square. \item Added Promax as a possible rotation in kaiser, and added the m = option to it as well. (In response to some correspondence with Stefan Dombroski). \item Clarified the KMO and cortest.bartlett help pages. \item Started to get rid of all of the deprecated functions. (or at least move to them to unique help files). \item Added a trivial but useful function isCorrelation to improve the tests in alpha, scoreItems, etc. \item Serious reworking of omegaSem and omegaFromSem to allow the use of lavaan or sem to do cfa as part of omega. Also, it is now possible to do omega directly from a sem/lavaan run. \item Added lavaan as a source of omegaSem and omegaFromSem. Also, for omegaFromSem, added some of the normal omegasubscale output (suggested by André Kretzschmar). \item fixed alpha, principal, fa, kmo and omega to check if the input is a symmetric correlation matrix or just a strange case of raw data (following a suggestion by Adam Liter). \item Modified cohen.kappa to truncate confidence limits to +/- 1 and added a discussion to the help page describing how to do a boot strap of cohen.kappa to examine the distribution of cohen.kappa (suggested by Andreas Spitzmueller). \item allowed scoreItems to handle variables that are factors. Added a warning when scoring total scores with impute="none" when missing data are found. \item Added a parameter to densityBy (violinBy0 to allow for scaling the widths by sample size. \item Added a helper function, dfOrder to sort (order) a data.frame by multiple columns \item Completely revised scoreIrt (and the associated functions) to get around problems with all 0 or all 1s and to make the function faster, and perhaps more accurate. \item Changed the sort option in irt.fa to be FALSE (makes scoring easier). \item Fixed VSS so that it reports not the biggest, but also the first where the sign changes. \item Allow imputation in factor scores (e.g. median or mean imputation for items) (requested by Stephan Daus). Also report the number of missing observations per subject when doing factor scores. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed the RMSEA in fa.stats. Although the confidence intervals were correct, the RMSEA value was slightly inflated. Reported chi squares were correct, just the RMSEA was slightly off. \item omega and schmid incorrectly estimated total variance in case of being given a factor loading matrix and correlation matrix as input instead of the more normal case of a raw data set or a correlation matrix. \item Bug fixed in scoreIrt where the values for the scores for people who gave all of the highest possible scores was incorrect (reported by Roland Leeuwen ). In addition, the scoreIrt.poly was not properly doing normal scoring but was in fact just doing logistic scoring. Fixed. This bug was affecting those people with max or min responses, and thus was particularly a problem for short scales. \item Fixed a bug in scoreItems for cases where some variables (not scored) are categorical (Reported by Ronald Fischer). \item Fixed bug in fa.parallel where it is not drawing both resampled and simulated lines. (reported by Alexander Weiss). This was intentionally added in about March to make a cleaner diagram, but now I am discussing this in the help file. \item Allow specification of the direction of items in the scoreIrt.2pl to match the keys.list (previously it would match the factor analysis which could be backwards for some scales with many reversed items). \item Did not need to fix bug in wtd.table in polychoric by adding in minx and maxx, etc. (reported by Jeovani Schmitt) because this can be handled by settting global=FALSE. \item Minor fix in the default labels for the fa.parallel function (suggested by Meik Michalke) } } } \section{Changes in psych version 1.6.9 (2016-09-18)}{ \subsection{Additions}{ \itemize{ \item Made it clearer that score.irt with the keys option is basically a Rasch model.That is, tau and equal loadings are specified by keys. \item Serious rewriting of scoreIrt (both for dichtonous and polytomous items) to make it able to use multiple cores (speeding up at least 8 times) and making the code more efficient when scoring multiple scales. \item Added scoreIrt.1pl and scoreIrt.2pl to more easily find Rasch like and 2 parameter solutions. \item Added scoreFast to (very) rapidly find mean or total scores with or without imputation (ala scoreItems) but not to bother finding all the complicated statistics which take a lot longer to find. In addition, it just bothers to look at the items that to be scored, rather than all the items in the items set. \item Added a trivial function (cor2) to correlate two or more sets of data (x and y), round to 2 decimals, save and print the results. Truly trivial, but a time saver when comparing solutions. \item Added more functionality to dotchart and created error.dots. \item Added r2t,t2r,chi2r,r2chi as trivial conversion functions \item Added read.file, write.file, read.file.csv, write.file.csv as helper functions for input/output \item Added the ability to do Minimum Rank Factor Analysis (following a request by Michael Paul Grosz) \item Added an option to print.psych to allow significant digits to be controlled for describe (requested by Jeremy Biesanz) \item Added an exploratory structural equation function (esem) to do exploratory modeling, and esem.diagram to show it. \item Added keys2list which converts a scoring key matrix to a list form. \item Added the ability to just pass a keys.list to scoreItems, scoreOverlap, scoreIrt.1pl. This clearly simplifies the call. \item Added another helper function: keys.lookup that takes a keys.list and a dictionary and displays the content of the items to be scored. \item Did to cor2latex what we did in df2latex (i.e., add cut and big options) \item Fixed vss to do any kind of rotation (that is, just pass the rotation to fa). \item Modified error.crosses to allow specifying text, point, and error arrows separately (suggested and with help from Arnaud Defaye) \item Minor change to tr to allow it consider na.rm=TRUE, and for cohen.kappa to handle cases of items with no variance. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed subtle problem when handling missing data for score.irt for raw scores. Prior versions (< 1.6.7) would report incorrect total scores for cases with missing items that were to be scored negative. \item Corrected the reference to Revelle, Wilt and Rosenthal in the bfi data set. \item Fixed a subtle problem in print.psych.iclust that was causing errors in the development version of R for PCs. \item Added mu backinto sim.hierarchical (thanks for Alan Robinson for noticing it was gone). \item fixed bug in cor.wt for the problem of a vector of weights rather than a matrix of weights (Reported by Emil Kirkegaard) } } } \section{Changes in psych version 1.6.6 (2016-06-20)}{ \subsection{Additions}{ \itemize{ \item Added an iterative option to cosinor so that it can do phase adjustment. \item Added error.bars.tab to do plots of tabular data with error bars \item Added a sort option to irt.fa so that items are sorted by factor loading. \item Added pca as an alias to principal \item Added the cut and big option to df2latex \item Added a few cross references to make it easier to find functions. \item Modified describe to take character vectors and matrices \item Added labels to factor.plot for more than 2 factors. Not as hard as I thought. Added a show.names options to plot the labels without overwriting a point. \item Modified corPlot to find correlations if input is a non square matrix or data.frame (but not an object from fa or omega). \item Modified describe (and therefore, describeBy) so that it can take more options (including interquartile range) and quantiles (requested by Gilbert Gregory). \item Added a weight option to fa to weight subjects by a weighting vector (requested by Mike Hammer). \item Added anova.psych to allow for tests for chisquare differences between factor solutions. \item Serious modifications to biplot.psych, fa.plot, and cluster.plot to allow names (labels) in the multi-panel (more than 2 factor) option. Also added the choose option to plot just one selected pair of factors from a larger set. This is useful if showing a 3 or 4 factor biplot and then "blowing up" one of the pairs. \item Serious analysis of the use of global and correct for examples of small data sets provided by Gabriele Cantaluppi. Some resulting improvement in documentation and options. } } \subsection{Bugs Fixed}{ \itemize{ \item Changed principal (and fa) so that when it calls GPA::rotation(Varimax) it passes options (such as eps=1e-7) to get more accuracy. (Reported by Gottfried Helms) \item Fixed error.bars.by to handle the case of a single variable \item Fixed plot.irt.poly so that it communicates correctly with irt2latex for multiple factor case \item Fixed irt2latex so that it handles results from plotting irt as well as directly from irt.fa \item Fixed polychoric so that it returns meaningful tau values in the presence of missing categories (reported by Gabriele Cantaluppi). polychoric was returning incorrect values of correlations and tau for the case of item responses differing in the number of alternatives, or with different minima. Particularly a problem with very small sample sizes. Also generally improved polychoric. } } } \section{Changes in psych version 1.6.4 (2016-04-20)}{ \subsection{Additions}{ \itemize{ \item To make the distinction between Principal Components and Rotated Components and Transformed Components, I have labeled them as PCi, RCi and TCi. I said I did that back in 2013 but I did not seem to have done it! \item Modified principal so that it will now handle the covar option correctly when finding principal scores. \item Modified bi.bars so that both the left and right panel are positive numbers (requested by Lorien Elleman). Also added the ability to label the columns/rows. \item Modified omega.diagram so that labels can be applied to the factors. \item Added the use option to splitHalf. \item Minor tweak to output of fa.extend for the case of extending to one variable. \item Modified fa.congruence to consider the case of missing loadings (requested by Emil Ole William Kirkegaard) \item Modified documentation to cortest.bartlett to reflect test residual matrices from fa. \item Modified alpha to all the specification of n.obs if using correlation matrix. This allows for finding the s.e. of alpha even from a correlation matrix. \item Modified cor.wt to handle the problem of missing data \item Modified pairs.panels to allow plotting the raw data points but the weighted correlations \item Add the ability to do two level hierarchical factor analysis (i.e, taking out n at level 1, m at level 2. Currently we do this with m=1 for omega) \item Added a warnings option to the alpha function as requested (repeatedly) by Markus Graf. \item Added two experimental measures of unidimensionality/goodness of fit to alpha. \item Modified mediate so that it handles moderation as well \item Added the diag and upper option to corPlot and cor.plot to allow for blanking out the diagonal or upper off diagonal (Suggested by David Condon). \item Added a number of options to plot.residuals to give more helpful displays \item Added (improved) error.bars for taking in data from an external source. \item Completely rewrote the mediate function to be more general and to have better graphics. Added the ability to have multiple y variables. } } \subsection{Bugs Fixed}{ \itemize{ \item Modified principal so that it will now handle the covar option correctly when finding principal scores. \item Corrected a bug in alpha (reported by Tamaki Hattori) to correctly find the s.e. of alpha. I was slightly overestimating the correct values. \item Fixed a bug in setCor that was given incorrect SE for regressions in the case of std=TRUE (the other stats were correct, but the t and p values of the betas were wrong) \item Fixed a bug in error.bars that led to not drawing catseyes when importing using the stats option. Reported by Niall Bolger \item Fixed read.clipboard.upper so that it works properly with names=TRUE \item Fixed a bug in fa.parallel that would not report just fa=fa in the case of correlation matrices (reported by Aaron Weidman) } } } \section{Changes in psych version 1.5.8 (2015-08-29)}{ \subsection{Additions}{ \itemize{ \item Added rot.mat as an output to the fa and principal objects. Cleaned up the fa and principal rotations so they all work. \item Added amplitude, mean, and intercept to cosinor } } \subsection{Bugs Fixed}{ \itemize{ \item alpha: fixed the warning about reverse keying when keys have been specified (reported by Marc Heerdink) \item Fixed the LaTeX bug in the vignettes (I was trying to float htdp instead of htpb) (as requested by CRAN) \item Fixed the LaTex bug in df2latex (same problem) } } } \section{Changes in psych version 1.5.6 (2015-06-20)}{ \subsection{Additions}{ \itemize{ \item statsBy: Added the ability to find within group and between group covariances instead of just correlations. This also allows the type of correlation to be specified. \item mediate: Can now do more general mediation of x -> M -> Y for multiple X variables and multiple M variables. \item mediate.diagram and moderate.diagram are now more functional \item fa: Added the option to reject weird solutions when doing iterations of factor solutions (suggested by Vencislav Popov) \item fa, principal: Improved documentation of principal and fa to explain the com variable. \item alpha: Changed the default for check.keys to be FALSE. This will result in a warning saying that you probably want to reverse keys, and tells you how to do so. In response to comments by Oliver John saying that we were making it too easy to find alpha. \item dia.arrow: Added the both option to allow curved arrows to have heads at both ends (useful for setCor and mediate diagrams) \item plot.irt and plot.poly: Added the option to plot different colors for each item. (requested by Yanna Weisberg). \item Explicitly declared all functions input from the stats, graphics, etc. cor R packages. \item pairs.panels can now draw multiple plot characters (see the second iris example). \item faBy added a free or fixed option to force loadings to the pooled solution (if possible) } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed fa so that the bifactor, TargetT, equamax and varimin rotations return correctly (they were returning the unrotated solution). (Reported by Matthias Bellmann.) \item describe: changed describe so that it will give (with *) stats for factors (replacing a functionality that disappeared sometime ago) The resolves the problem reported by Alan Kelly. \item Fixed a bug in fa2latex so it will boldface "big values" and cut small values instead of one or the other. (Reported by Simon Kiss) \item df2latex and fa2latex: corrected the location of font.size to be inside the table to work correctly (Reported by Simon Kiss). \item pairs.panels the cor option now works correctly (as specified in the help file) when showing the regression line (it had been reversed) } } } \section{Changes in psych version 1.5.4 (2015-04-15)}{ \subsection{Additions}{ \itemize{ \item principal: Fixed principal to be compatible with changes in fa wrt rotations \item Fixed schmid and omega to compatible with all rotations \item Modified setCor and mediate so that they just read in the relevant variables when finding the covariance matrix. \item Added a simulate data option to mediate to handle mediation models based upon covariance matrices. \item Added a density option to error.bars.by so that transparency is easier. \item Cleaned up faBy to handle missing data \item Modified setCor so that variables can be specified by name or number. \item Modified scoreOverlap to handle the case of NAs in the correlation matrix. \item Added the ability to find and print item by scale correlations (corrected for item overlap and scale reliability) in scoreOverlap. \item Added the ID number used for grouping to be the rowname for the output in cosinor (requested by Ashley Kendall) \item Added circadian.stats and circadian.F to do basic circadian stats and group comparisons. \item Added circadian.sd to work with circadian.mean to allow t-tests. \item Added circadian.reliability to find the split half reliability of phases found by cosinor or circadian.phase. \item Renamed best.items and best.scales to bestItems and bestScale. Change the call to be more consistent. \item Added two new features to pairs.panels. One allows it to handle character variables (which will be translated to numeric levels) as requested by Richard Cotton. Added the ability to specify the number of breaks in the histogram as requested by Philipp Thomas. Also added the ability to change plot character size independently of correlation print size (cex.cor). \item Did a drastic rewrite of the pairs.panels function so that is much cleaner (partly in response to a bug reported by Valdar Tammik and a request by Richard Cotten). \item treat character as factor in pairs.panels. Requested by Richard Cotten. \item Modified dia.arrow and het.diagram to allow control over the gap and label sizes. (Requested by David Condon). \item Added a MAR parameter to cor.plot so long labels will fit \item Changed the output of the principal function to not give MLE chi square based fit statistics. (Which really did not make sense.) \item Added the sim.correlation function to simulate sample correlation matrices from a population correlation matrix. \item Added invisible return from fa2latex and df2latex to allow for capture as a file to put into a latex document. (Requested by David Condon.) \item Did a major rewrite of fa.parallel to make the structure more clear and to allow for choosing mean or quantile values of the eigen value comparisons. \item Modified fa so that it allows (without warning) for smcs in case nf > n.var/2. This case will still report a df problem in terms of fit statistics. It will still throw a warning if using an oblique transformation. \item Modified splitHalf so the splits reported are keyed appropriately if some items need to be reversed. \item Added the cta.15 function to lock down cta as published by Revelle and Condon. cta is now the development version, cta.15 is the version reported in R and C. \item The smc function has been modified for the case of correlation matrices with some missing values. SMCs are found based upon the complete correlations, and alternative estimates are found for those with missing values. \item Added warning message to fa, principal, and iclust so that they stop if there are NA values in the correlation matrix. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed mat.regress bug by implementing it as part of setCor. \item Fixed faBy to handle the case of bad data more elegantly \item Fixed a bug in sim (seemingly introduced when I changed the way of finding simulated correlation matrices. Bug also affected the other sim.functions) \item In factor.stats, sometimes the RMSEA limits do not include RMSEA. Rather than reporting non-sensical values, just report NAs. \item fixed bug in bestScales that would cause an error if items were missing in the dictionary. \item Fixed a bug in fa.plot where it would not handle data without a title. \item Fixed a long standing problem in biplot to allow multiple graphs in the same window, and then, discovered and fixed a problem with the margins getting progressively smaller. \item Fixed a bug (reported by Isabelle Rivals) in cohen.kappa (just for weighted kappa) when working with raw data with more than 9 categories. \item splitHalf failed for n > 32. Fixed. (Thanks to Tom Booth for having the data set that produced this error.) \item test for weird correlation matrices failed in the presence of NA fields in scoreOverlap (reported by Lorien Elleman). Fixed. } } } \section{Changes in psych version 1.5.1 (2015-01-20)}{ \subsection{Additions}{ \itemize{ \item Added the cor option to fa so that it can find tetrachoric, polychoric, or mixed correlations. Cleaned up the fa code a bit by using switch instead of convoluted if statements. \item changed cor.smooth to detect any eigenvalue < 10^-12 This had been 10^-15 \item Added the Bonett generalization to the Yule coefficient and added confidence intervals as well. (Suggested by Barry Dwight). \item Changed tetrachoric and polychoric to use sadmvn from the mnormt package instead of mvtnorm. This seems to result in a speed up of a factor of 2! For complete data, the results match that of lavCor (from lavaan) exactly (or at least to the 3 decimals that lavCor reports) but seem to be four times faster for dichotomous data and 20\% faster for polychorics than lavCor. \item Added a message to fa if a Heywood case is detected. Suggested by Sagnik Chakravarty. \item Added a covariance option to omega and to schmid. Requested by Qingping He. \item Modified describe to add a fast option for larger data sets. Added a discussion about how to speed up descriptions for very large data sets. \item Added mediate function to estimate mediation and moderation models. (requested by Dan Molden). \item Added mediate.diagram to show the results of mediation models. \item Added regression.diagram to show regression and set.cor models. \item Modified phi.demo to compare phis with tetrachorics and Yules. \item Modified fa.parallel to allow it to use tetrachoric, polychoric, or Yule coefficents (similar to the change in fa). \item Added a number of if(requireNamespaceGPArotation) GPArotation:: blah blah to make it more compatible with the new namespace checking and error complaints \item Modified scoreOverlap to allow for finding correlations from any non-square data matrix (had been limited to case of non-square where the data were outside a 0 1 range). \item Added documentation to alpha and scoreOverlap justifying the r.cor statistic. } } \subsection{Bugs Fixed}{ \itemize{ \item Corrected cor.ci to fix a capitalized rho (which led to a failure if not using keys. And, then more importantly, fixed it again so that it will work for overlapping scales as advertised. \item Corrected r.test to use df = n-3 instead of n-2. Thanks to Bruno Ernande for reporting this. \item Corrected the documentation to fa wrt oblique.scores to correctly represent what it does (I had had it backwards) Thanks to Mark Seeto for reporting this. \item Added a message to principal if rotation is not correctly specified and defaulting to "none". Suggested by Sagnik Chakravarty. \item Fixed describeData so that it doesn't choke on string variables. \item Modified the print.psych function so that irt information is printed for every unit rather than every .1 unit. The data are still there in the plot.info object. \item Fixed cor.plot to properly add labels to the plot when plotting factor loadings. \item Fixed phi.demo to compare phis with tetrachorics and Yules. \item Fixed a print problem for fa.parallel \item Fixed a rare case in fa.stats for the case of singular matrices \item Fixed the way missing is handled in fa so that it actually works! (problem found by Elizabeth Barrett-Cheetham) \item Modified error circles to pass just the x and y values to statsBy. } } } \section{Changes in psych version 1.4.8 (2014-08-10)}{ \subsection{Additions}{ \itemize{ \item Added the ability to find polychoric correlations within groups in statsBy (requested by Aidan Wright). \item Modified cor.ci to allow for cluster overlap corrections. \item Added het.diagram to allow for diagrams of heterarchical structures. \item Modified dia.arrow to allow for adjusting where the label appears. Applied this fix to fa.diagram. \item Modified fa.sort so that it will sort the results of a principal components analysis (requested by Popov Oleg). \item Added the outlier function to find Mahalanobis distance measures for a data matrix and flag those cases that are unusual. \item Added draw.cor to show how bivariate densities estimates are used in finding tetrachoric correlations. \item Added faBy to allow for factor analysis by groups. \item Removed inactive calls to the polycor package } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed alpha so that it properly returns the average correlation if given a correlation matrix (reported by Franziska Zuber). \item A stylistic change in tetrachoric so that it is easier to read. \item Fixed two examples in tetrachoric that were impossible \item Changed a test condition that was not allowing vector input to be processed in tetrachoric. \item Fixed a bug that would lead polychoric to break if there were no cell entries (bad data condition). Missing cells are now given NA. (Reported by Elina Zaonegina.) } } } \section{Changes in psych version 1.4.6.20 (2014-06-20)}{ \subsection{Additions}{ \itemize{ \item Added read.https function \item Added the ability to specify group by name or variable location to describeBy, error.bars.by, violinBy, mssd and rmssd. \item Added the equamax rotation option in fa. (requested by Sagnik Chakravarty, with solution by Gunter Nickel) \item Added the ability to do factor.extensions for omega analyses. \item Added some error checking in polychoric for case of bad data (requested by Simon Kiss) \item Added the ability to specify the base pch in error.bars.by (requested by Tham Tran) \item Modified tetrachoric and multi.cor to use correct = .5 (that is, to make the correction for continuity a variable rather than a logical to do or not do corrections.) In addition, modified multi.cor to do polytomous by dichotomous variables using a generalization of polychoric (polydi). \item Modified plot.irt and plot.poly to work together better and to allow specifications of the xlim and ylim across plots. \item Added histBy to draw multiple histograms on a single plot \item Added scoreOverlap to adjust correlations between overlapping sets of items. \item Modified error.bars.by to draw semi-transarent catseyes and to allow for settings of pch, lty, and color for each variable. \item Modified fa2latex to boldface abs(loadings) > cut. \item Added the varimin rotation from Suitbert Ertl to principal and fa. } } \subsection{Bugs Fixed}{ \itemize{ \item fixed print.psych.vss to properly print the minimum eBIC value. (reported by Lorien Elleman) \item made x a matrix in mssd to fix a problem of finding mssd of a single variable (reported by Aidan Wright). \item fixed con2cat so that it will handle missing data (correctly) \item Modified error.bars.by so that it will not break if there is only one case in a group. } } } \section{Changes in psych version 1.4.5 (2014-05-11)}{ \subsection{Additions}{ \itemize{ \item Added con2cat to allow making discrete categories from continuous data. \item Modified the cats eyes in error.bars so that colors can be specified (requested by Lauren Tindal). \item Modified set.cor so that calls by name or location work equally well. \item Added the option to change the size of the correlation numbers in cor.plot to reflect the probability values. \item Added a default so that cor.ci automatically draws the scaled cor.plot (requested by David Condon). \item Changed the structure of the fa return when we have confidence intervals. It has been forming another fa object, but this leads to confusion when sorting. \item Modified print.psych.fa.ci so that it handles the revised fa output. \item Modified vss so that it doesn't find factor scores on each iteration. \item Added the plot.cor.upperLowerCi function to plot the confidence boundaries for correlations (requested by David Condon) \item Modified fa.sort so that it will handle confidence intervals for loadings (requested by Jason French). \item Modified fa2latex to boldface significant loadings based upon the confidence intervals (requested by Jason French). \item Modified score.irt so that it will change the sign to match the direction of the keys (if supplied) \item modified error.bars to that x axis labels can be rotated to be vertical (requested by Greg Miller). } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed cor.ci so that it handles case of missing correlations (reported by David Condon) \item Modified plot.irt so that labels are applied for multiple factor solutions (reported by Jason French). \item Modified fa2latex so that it will not blank out h2 and u2 values for cut >0 (suggested by Daniel Zingaro). } } } \section{Changes in psych version 1.4.4 (2014-04-15)}{ \subsection{Additions}{ \itemize{ \item A few tweaks to corr.p to be compatible with corr.test \item Improved statsBy so that it can properly handle two grouping variables. statsBy also labels the columns in the within object. \item Added pairwise names for factor intercorrelations in print.psych.fa.ci. } } \subsection{Bugs Fixed}{ \itemize{ \item Two bugs were introduced into corr.test in version 1.4.3. Both fixed. (reported by louis-charles vannier and Clemens Fell) \item Fixed print.psych.fa so that it properly sorts the complexities (bug reported by Kai Horstman) \item Fixed fa so that if a non-available rotation is requested, a warning is issued. \item Fixed alpha for case of 2 variables. \item Fixed the naming of correlations in cor.ci \item Fixed the empirical confidence intervals for alpha in alpha. } } } \section{Changes in psych version 1.4.3 (2014-03-24)}{ \subsection{Additions}{ \itemize{ \item Added best.scale to empirically combine the best items for a scale to predict particular criteria. \item Changed the call to best.items to match fa.lookup. \item Added a warning for cor.ci if some of the correlations are NA as would be the resulting scales. Will still find the other correlations but be careful. \item Added empirical estimates of fit to the fa and fa.stats functions. These are more robust to misfit for matrices that are not positive definite. (Requested by David Condon and Lorien Elleman). \item Added these empirical estimates of fit to vss. \item Added the nfactors function to do a call to vss and then do nice graphics \item Added fa.organize to hand sort factor output by factor number and by item order. \item Added item.lookup to organize items by factor and then by item mean. \item Added densityBy to show violin plots. Will do this by groups. \item Added cats eyes to error.bars and error.bars.by. \item Added normal theory confidence intervals to corr.test. (Suggested by Alexander Weiss) \item Added the ability to just resample and not simulate in fa.parallel. (Requested by Ashley Kendall.) \item Fixed fa.parallel and fa.parallel.poly for case of very low frequency data. We now resample until we get a matrix with no NA values. (Problem reported by Eric Green). \item Added a modification to error bars to print a dark circle at the mean. ( Suggested by Jaroslaw Arlet.) \item Add significance and confidence intervals to partial.r (Suggested by Elizabeth Schubach). } } \subsection{Bugs Fixed}{ \itemize{ \item cor.plot was incorrectly plotting the numbers by col instead of row. Just a problem for asymmetric matrices. Reported by David Condon. \item corr.p was not printing correctly. Reported by Robin Beaumount. } } } \section{Changes in psych version 1.4.2.1 (2014-02-03)}{ \subsection{Bugs Fixed}{ \itemize{ \item Fixed print.psych.fa to cover several weird cases of no x$R2 or stats.df that were causing HDMD package to fail. } } } \section{Changes in psych version 1.4.2 (2014-02-01)}{ \subsection{Additions}{ \itemize{ \item Added the epi data set and epi.dictionary of items. \item Added bfi.dictionary to show the item contents of the bfi example. \item Added BIC, SABIC, RMSEA, and complexity for each factor model in VSS (vss) (requested by David Condon) \item added the alias of fa.congruence and fa.stats as calls to factor.congruence and factor.stats in a slow move to consistency \item Added fa.lookup to allow for sorted factor loadings with item contents if using a dictionary for the items. \item Modified print.psych.fa so that warnings about how factor score correlations are improper are suppressed (as is the printing of the improper correlations). } } \subsection{Bugs Fixed}{ \itemize{ \item defined global=TRUE as default in fa.parallel.poly (reported by Koji E. Kosugi) \item changed if(max(R2 > 1) to if(any(max(R2 > 1)) in print.psych.fa (reported by David Condon) \item Fixed the se calculation in describe which had been incorrectly changed in version 1.4.1. Thanks to Terry Jorgensen and others for reporting this. \item Fixed fa.sort so the that communalities and complexities are sorted as well } } } \section{Changes in psych version 1.4.1 (2014-01-20)}{ \subsection{Additions}{ \itemize{ \item Substantial improvements in speed through the use of the parallel package and some coding improvements. \item Added parallel processing (multicores) to polychoric, tetrachoric, and mixed.cor. With 2 cores this saves 50\%, but this is not a linear function of the number of cores: with 4 cores the savings seems to be 67\% and with 8 cores 75\%. Requires the parallel package which seems to be supplied with core R. Apparently this benefit will not help those running PCs which don't implement forking. \item Added parallel processing using multicores to fa when calculating confidence intervals. Basically changed the interation loop to an lapply and then made that a mclapply. \item Added parallel processing using multicores to fa.parallel, cor.ci and alpha (for the empirical bootstrap). \item Yet another speed improvement by introducing tableF: a cut down/speeded up version of table. This works only for tabulating two integer vectors, but by reducing error checking, is at least twice as fast. This is used in particular in polychoric and tetrachoric, and therefore in mixed.cor as well. \item Changed fa so that confidence intervals are raw rather than fisherz transformed before averaging. \item Modified ICLUST to increase speed by about 50\% by doing the smcs once instead of every iteration. This does not use multicores so the savings will be for PCs as well as OS X and Unix. 50\% of time is now spent doing matrix multiplication -- can we speed this up by not rescoring all clusters each time, but rather doing incremental changes? \item Minor tweaks to increase speed when creating lists and matrices. \item Added omega2latex and modified fa2latex to allow not printing small values. \item Added ICC2latex. \item Changed score.items to scoreItems in a continuing switch to camelCase. score.items will eventually be deprecated unless the entire score. functions are integrated somehow. \item Added option to sim.congeneric (and others) to do multiple simulations with same theta values. (Actually, it was there all along. Just set.seed(some value) before each run). \item A minor tweak to r.test to better report the call in the case of a correlated correlation. \item A minor tweak to corr.test so that if all the sample sizes are the same, just report one number. \item Reduced the number of executable examples to speed up compilation. The examples are just commented out. Worth running for understanding. \item Added the Gleser data set for an example of generalizability theory. \item Added the ability to find scores automatically in omega. \item Added the ability data set (just the iqitems data set scored for correct/incorrect). This is more convenient for demonstrations. \item Added the check option in describe (turning it off increases speed drastically, but at the risk of not detecting logical or factor data). \item Added the describeData helper to do a quick pass to list the data types in a data.frame. \item Rewrote the summary function to use switch rather than a series of ifs. \item Added a parameter to factor.scores to allow the data and a correlation matrix to both be passed for scoring. \item Added Structure as an output for principal (requested by Emeka Egbuna) \item Added best.items and lookup as two helper functions. } } \subsection{Bugs Fixed}{ \itemize{ \item sim.cor.poly -- fixed so that it actually works. \item Fixed alpha so that it will properly reverse key if given correlations. (Not sure when this stopped working.) \item Corrected the sign of at.rest in the TA scoring key for the msq.Rd file \item Corrected reverse.code to not add a constant to positive items (reported by Jian Jin) \item Perhaps finally fixed pairs.panels so it will not change options for graphics window. \item Fixed polychoric so that it will not blow up if the number of response alternatives are not equal. (Reported by Jeanette Lim ) \item Fixed fa.poly so that the scores are calculated based upon the polychoric correlations rather than the Pearson correlations (based partly on trying to solve another problem reported by Jeanette Lim) \item Fixed scoreItems for the case of an unnamed keys vector (reported by Sara Weston) } } } \section{Changes in psych version 1.3.12 (2013-12-10)}{ \subsection{Additions}{ \itemize{ \item Added a brief wrapper function (corFiml) to call FIML procedures taken from lavaan. Inspired by some code from Joshua Wiley and with the help of Ashley Brown. The lavaan functions necessary for this are not exported from lavaan and hence have been partially moved (with adaptations) to psych. \item Completely modified guttman to use the splitHalf function. This provides much more accurate estimates of the greatest lower bound. \item Added the splitHalf function to find all possible split halves of scales of up to 16 items, and to sample repeatedly random splits for more than 16 items. \item Modified plot.parallel.poly and fa.parallel.poly to allow choice in plotting between both, fa, and pc. (Matches the fa.parallel function). \item Added the weight option to fa.poly, polychoric, tetrachoric, and mixed cor. This allows for cases to be given unequal weights. Requested by Fabricio Flalho. \item Enhanced the documentation for irt.fa and plot.irt to explain when to use plot.irt vs. plot.poly (Requested by David Condon). \item Enhanced cor2latex with the help of Davide Morselli. Now can find correlations directly and can also (if desired) show probability stars. \item Added standard errors and 95\% confidence boundaries to alpha and score.items (suggested by Doug Lawson) \item Minor tweaks to irt2latex and fa2latex. } } \subsection{Bugs Fixed}{ \itemize{ \item Corrected the documentation to fa to specify that it allows targetT and targetQ rotation. \item Corrected kaiser to return as class "psych" and "fa" so that solutions can be used in fa.diagram. (Reported by Gouri Shankar Mishra) \item Corrected alpha so that it reports r.drop in the case of working from a correlation matrix. (Reported by Nicolas Hubner) } } } \section{Changes in psych version 1.3.10.12 (2013-10-12)}{ \subsection{Additions}{ \itemize{ \item A serious speedup to tetrachoric and polychoric was initiated with the help of Jason French. The increase in speed is roughly 1- (nc-1)^2/nc^2 where nc is the number of categories. Thus, for tetrachorics where nc=2, this is a 75\% reduction, whereas for polychorics of 6 item responses this is just a 30\% reduction. \item Added the ability to rotate the numbers in cor.plot by specifying srt. (Thanks to a suggestion on Stack OverFlow by shujaa). \item Added the use parameter to the fa function to allow for other than pairwise correlations. \item Added an invisible return from the print.psych.fa function to include the variances accounted for and the cumulative variance accounted for. (Requested by Eric Green). } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in print.psych.fa so that it did not always work with the HDMD package. \item Modified fa.parallel.poly so that the progress bar is not called during the tetrachorics or polychoric simulations. \item print.psych.fa did not work for covariance input when adding the complexity option. } } } \section{Changes in psych version 1.3.10 (2013-10-03)}{ \subsection{Additions}{ \itemize{ \item Added Yule2phi and Yule2tetra to convert Yule Q coefficients to phis or tetrachorics. Completely rewrote Yule.inv and Yule2phi. \item Added sim.poly.mat to generate polytomous or dichotomous items given a particular item structure. \item Modified describeBy to allow specification of digits in the matrix output. \item Modified multi.hist to allow one more control over graphic output. (Requested by Lars Carlsen). \item added iclust.sort to documentation of iclust. \item Added label option to fa2latex (requested by Robert Carlisie) \item Added sanitize.latex function to all the latex functions so that they properly escape underscore and ampersand. \item Added method option to scatter.hist to allow for other correlation methods \item Modified tetrachoric and polychoric so that (0,1) and (1,2) data can be mixed together. tetrachoric and polychoric agree for dichotomous data (as they should) \item Added an option to the mixed.cor function to allow for spearman or kendall correlations. \item Added Hoffman's complexity index for the factor analyses output \item Modified make.keys to allow addressing items by name rather than location. \item Modified alpha to allow for specify items to reverse by name or location. \item Modified the msq data set Rd file to address by name rather than location \item Minor tweaks to fa.plot and cluster.plot to allow more graphic control \item modify structure diagram so that the error arrows are to the left for lr printing \item Modified fa.plot and cluster.plot to allow positioning and sizing of item labels for two dimensional plots. \item Made sure all usage and example lines are less than 90 characters. \item Added the ability to calculate factors scores in fa.poly. \item Significant improvement in biplot.psych to allow much greater control over plotting. \item Added cor.ci to find bootstrapped confidence intervals for raw and synthetic correlations. \item Added group reliability calculation for omega. \item return (invisibly) the eigen values of the factors and components from scree. \item Added unit weighted multiple correlation and unit weighted set correlation to set.cor. \item Added test.all to allow for testing whether other packages work (or at least their examples) when psych is loaded. (Can be used for any set of packages.) \item Added the ability to simulate as well as resample dichotomous data in fa.parallel.poly (requested by Balal Izanloo). \item Drop (with a warning) items that have no variance in alpha (requested by Eric Green). This feature has also been added to score.items. \item Modified upperLower so that the rownames come from the lower diagonal matrix, and the column names from the upper diagonal matrix. } } \subsection{Bugs Fixed}{ \itemize{ \item fixed statsBy so that it correctly reports the pooled values (reported by John Rauthman) \item corrected a problem with fa that was leading to errors when handling very non-positive matrices \item fixed a bug in Yule.inv that was leading to impossible values being reported \item fixed the way that biplot.psych returns the op$mfrow parameter \item applied the same fix to pairs.panels to perhaps fix the recurring problem \item Fixed fa.parallel to not break if just principal components output is requested \item Identified and fixed bug in mixed.cor for case of improper data. \item Identified and fixed? bug in mixed.cor so it will treat randomly mixed data \item Corrected corr.test and cor.p to properly pass the adjust option (reported by David Weisman and Russell Pierce) \item Corrected error.bars, error.bars.by and errorCircles to use n-1 for df in the call to qt. Modified the documentation to point out that we are using the t-distribution. (Thanks to Trevor Dodds) \item Fixed fa.parallel so that it correctly prints out the legend for the case of fa="pc" or fa="fa" (reported by Andrew Hsiao and others) \item Corrected the way confidence intervals were found in fa.poly \item Correctly name the scored variables in case of impute = 'none' (reported by David Condon \item Complains, rather than blows up when doing irt.fa and plot.poly for Heywood cases. \item Added column names to factor scores. \item Corrected the way that data are randomized in the fa.parallel.poly function so that each variable retains (roughly) the original difficulty. Also did this for fa.parallel. Reported by Balal Izanloo. \item Fixed (?) progressBar so that it does not freeze the R.Gui for Mac. } } } \section{Changes in psych version 1.3.2 (2013-02-28)}{ \subsection{Additions}{ \itemize{ \item Cleaned up help files so that lines don't over run the page in the pdf version. (Suggested by Jennie Miller). Still seems to be a problem. \item Fixed factor.stats so that it does not complain when finding scores for principal \item Fixed fa2latex so that the apa option correctly puts in the last cr \item Added ECV to omega to give an estimate of unidimensionality \item Add a parameter to set.cor and mat.regress to allow for square data matrices (suggested by Thomas Richardson). \item Changed the default in principal to find scores (scores=TRUE). This makes it compatible with fa. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed principal so that it labels the components as rotated or transformed (as documented and previously implemented. Somehow this feature went away. \item Fixed fa so that specifying SMC=FALSE will replace the diagonals with 1s instead of 0s. (Seems to have been an error introduced when the SMC= a vector option was introduced) \item Minor bug fix to sim.omega so that it will not choke on badly structured sem commands. } } } \section{Changes in psych version 1.2.12 (2013-01-20)}{ \subsection{Additions}{ \itemize{ \item Added fa2irt to convert factor analysis output to Item Response Theory type output. (Basically doing what irt.fa already does, but from a separate factor analysis. \item Added fa.extend to make factor extension easier to do. \item Added an overall goodness of fit tests for fa.extension applied to the extension variables. \item Cleaned up factor.stats to make it do fm="minchi" more readily. \item Added two trivial helper functions r2d and d2r to convert from correlations to effect sizes and back. \item added the von Neumann test of successive differences (mssd and rmssd). This is useful for studying within subject changes in mood. \item Changed the help files for test.psych, irt.fa, iqitems to dontrun the examples (cutting down time to compile and test the functions) } } \subsection{Bugs Fixed}{ \itemize{ \item fix the output in the example for fa. Fixed by noticing that: \item in fa, fm="pa" did not return labels for the factors. \item in statsBy, fixed a problem if the by returned NULL values for some group values \item Finally fixed bug in describeBy for the case of NULL categories for the by variable. (Reported by Nipa Phojanamongkolkij). \item fixed bug in smc for case of missing values of diag(smc) \item added an error message to cor.smooth for the case of NA eigen values \item Changed the random seed in overview.rnw for the example of omegaSem to 17 (from 42). The previous seed was generating a bad omega solution which in turn caused sem to fail. \item fixed the call function in ICLUST to properly report the parameters being called. } } } \section{Changes in psych version 1.2.11 (2012-11-22)}{ \subsection{Additions}{ \itemize{ \item Added the rangeCorrection function to correct for range restriction. \item Added a new factor method (minchi) to the fa function. This weights residual correlations by the pairwise sample size. The minimum weighted residual is then found. This will give OLS (minres) solutions for the case of equal sample sizes, but slightly better (weighted) fits for unequal sample sizes. Most appropriate for the Massively Missing Completely at Random structure of SAPA data. \item Added a measure of the Kaiser-Meyer-Olkin index of factoring adequacy } } \subsection{Bugs Fixed}{ \itemize{ \item None yet } } } \section{Changes in psych version 1.2.8 (2012-08-25)}{ \subsection{Additions}{ \itemize{ \item Updated the overview vignette \item Added the ability to do target rotations (TargetQ) in omega. \item Improved documentation of fa to explain how oblique SS are found. \item A better set of items is used for the iq example (iqitems). \item add CVE estimate to fa (see Reise 2010) (had already done this for omega). \item Changed ICLUST.sort so that it can sort the loadings of principal (requested by Gudmundur Arnkelsson and then print them with the number of digits requested in the print command. \item Modified score.irt so that subjects who miss all items or pass all items are given an estimate based upon the (product) of the difficulty of the items they miss (pass) and then adjusted based upon half the quantile difference from 0 (if they miss all items) and 100 (if they pass all items). \item Modified sim.omega to allow for specifying a general factor. This allows for tests of not just the bias in the case of no general factor, but also the ability to detect a general factor. Also modified it to include calls to omegaSem. \item modified iclust.diagram so that cluster names can be specified rather than all start with C1 ... Cn (requested by Michael Kubovy) \item Slightly improved the documentation for r.test so it is clear which correlation is which. \item Modified iclust so that the fit statistic is based upon the off-diagonal elements unless otherwise specified. Use diagonal =TRUE to get fits matching previous analyses. \item Modified the print.psych.iclust function to print out the Root Mean Square Residual correlation. (It was previously reported in iclust output, but not printed.) } } \subsection{Bugs Fixed}{ \itemize{ \item fixed a bug in score.items, such that if the number of subjects is equal to the number of items, no scores were returned. Reported by Jeromy Anglim. \item Modified factor.stats so that rare condition of an exact fit in omega does not lead to an error. \item Fixed plot.irt so that item labels are correct \item Modified omegaSem to pass n.obs to sem \item Fixed df2latex so that it will on text variables as well as numeric variables } } } \section{Changes in psych version 1.2.7 (2012-07-31)}{ \subsection{Additions}{ \itemize{ \item Improved statsBy to find within and between group correlations. \item Added a data set, withinBetween, to graphically show the issue of multilevel correlations. \item Added a simulation function to generate multilevel data (sim.multilevel) \item Added a function (irt.responses) to plot responses as a function of the latent score for multiple choice alternatives. \item Modified progressBar so that it only shows dots when using the console, but not when using a text file for output (i.e., with Sweave). } } \subsection{Bugs Fixed}{ \itemize{ \item fixed plot.poly and plot.irt so that multiple scales for one factor can be plotted. } } } \section{Changes in psych version 1.2.6 (2012-06-20)}{ \subsection{Additions}{ \itemize{ \item Slight modification to cor.plot to allow for better control of multiple plots on the same page. \item Added the ability to put values into the cor.plot output (numbers=TRUE) \item Added a function (lowerUpper) to combine two symmetric matrices and output one as the above diagonal entries, and the other as the below diagonal entries of a square matrix. \item Added a function (topBottom) to act like headTail but to do it without ellipses. \item In a gradual switch to camelCase, changed the name of headtail to headTail, and describe.by to describeBy. Warning messages are issued about the change. \item Added a function, statsBy, to find summary statistics (means, sample sizes, standard deviations) by a grouping variable. Similar to describeBy but somewhat easier to use. Also will report (as an option) the pooled within group correlations. \item Added a function to find sample size weighted correlations, cor.wt. Useful for SAPA types of analysis. \item Modified superMatrix to take list input so that more than two matrices can be combined at once (suggested by David Condon). \item Added errorCircles, a greatly improved version of error.crosses. \item Added the option to specify starting communalities in the fa function (requested by David Gosar). } } \subsection{Bugs Fixed}{ \itemize{ \item cor.plot was actually switching row and colnames (but since it is mainly used on symmetric matrices, this was not a problem). Fixed. \item principal failed when handling missing data in data.frame. Fixed. (reported by Neil Stewart) } } } \section{Changes in psych version 1.2.4 (2012-04-30)}{ \subsection{Additions}{ \itemize{ \item (Note that 1.2.3 was not released but was replaced with 1.2.4) \item fixed ICLUST so that it converts covar matrices to correlations. \item Modified pairs.panels so that the method of correlation can be specified (suggested by Carsten Dormann) \item added error values to the structure.diagram/lavaan diagram as an option (suggested by José Luis Cañadas ) \item Changed the oblique.scores option in fa to be FALSE (use the structure the matrix) following a request by Niels Waller. \item Fixed the size of the boxes in dia.rect so that omega.diagram and fa.diagram provide reasonable size boxes for long names (requested by Erich Studerus) \item modified output of corr.test so that it announces whether or not it is adjusting the probability values for multiple tests. \item added the ability to export dot.plot commands without requiring Rgraphviz. (requested by Erich Studerus). Although just implemented for fa.graph, omega.diagram functions can be exported using fa.graph applied to the schmid$sl object from omega. \item Added 3 options to the skew and kurtosis functions, and thus to the describe and describe.by functions as well. (Suggested by Bruce Dudek). \item Added the ability to plot general factors (ala omega) in the fa.diagram function. \item Added the function lowerCor to find correlations (with pairwise deletion as a default) and print out the lowerMat of the results. } } \subsection{Bugs Fixed}{ \itemize{ \item A documentation error in r.test was fixed (reported by Nicholas S. Holtzman). The example of Steiger case B was incorrectly ordered and thus led to incorrect results. Specifying by name (rather than location) is now recommended. \item In fa, the rotation was not returned, fixed. (reported by Niels Waller) \item predict.psych would not work for a single case (reported by Jonathan Williams). Fixed. Also made the documentation a bit clearer to explain the way the factor scores are standardized. \item By adding a global-local parameter to mixed.cor it now seems to work on those data sets with badly distributed polytomous data. (Reported by Eric Smith and David Condon.) } } } \section{Changes in psych version 1.2.1 (2012-01-31)}{ \subsection{Additions}{ \itemize{ \item Modified scrub to be a bit easier to use (suggested by Josh Wilt) \item Added a check in mixed.cor so that if the data are incorrect, announce it first, rather than waiting until late in the process. \item Added a progress indicator to particularly slow functions (e.g. tetrachoric, polychoric, biserial). This will show up when using these functions in other functions (such as irt.fa or mixed.cor). \item Modified cluster.cor so that in the case of correlations that are NA, it will not find smcs of the raw correlation matrix. In addition, it will impute scale level correlations based upon the average between scale correlation (previously it was reporting an NA). Requested by David Condon. \item Added the helper function lowerMat to print the lower submatrix. Added this as an option to the print.psych function. \item Modified mixed.cor so that it will automatically (if desired) find the Pearson, polychoric, and tetrachoric correlation matrices and then reorganize the matrix back to the original form. \item Minor modifications to tetrachoric and polychoric to give slightly more helpful error messages in the case of missing data and corrections for continuity. \item Minor modifications to summary to give cleaner summary stats for fa and irt.fa analyses. \item Added a residual and plot.residuals function to plot either qq plots of the residuals, or a cor.plot of the residuals. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a few references that were working but doing partial matching in print.psych, test.psych, pairs.panels (e.g, in print.psych digit=digits was changed to digits=digits, test.psych nf was changed to nfactors, col was changed to col.smooth in pairs.panels). This had been flagged by linux checking as a note about "partial matching" for a long time. Finally figured out what it meant. \item corrected biplot.psych to properly take the xlim and ylim parameters (reported by Andreas M. Brandmaier) \item Correction to tetrachoric (and related functions) to return NA in the case of no cases for x or y. (Reported by David Condon) \item Fixed fa so that the score.oblique option actually works. (Reported by Jessica Jaynes). } } } \section{Changes in psych version 1.1.12 (2011-12-30)}{ \subsection{Additions}{ \itemize{ \item Modified scrub to be more general. \item Added factor structure as an object in the fa function and modified the print.psych.fa function to note that it is the pattern matrix being reported. \item Modified the ICLUST output so that the eigenvalues reflect the independent contribution of each cluster. \item Added a comment and an option to ICLUST.sort about the order in which clusters are sorted (suggested by Gudmundur Arnkelsson). \item Added the rug option to pairs.panels. This will, by default draw a rug underneath the histograms. \item Added a covar option to principal to find principal components from covariance matrices as well as correlation matrices. \item Cleaned up the objects returned from fa.parallel.poly to more closely match those from fa.parallel. \item Added the ability to find and report residual correlations in set.cor. Minor formatting corrections to set.cor and mat.regress. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in fa such that the oblique.scores option did not work. \item fa.parallel.poly was ignoring the fm parameter and was not printing correctly. } } } \section{Changes in psych version 1.1.1122 (2011-11-22)}{ \subsection{Additions}{ \itemize{ \item Added various rotation options to principal to match those in fa. \item Added "components" as an option to factor score to score principal components using just the component loadings. This is most appropriate when not rotating the components. \item Added the Harman.5 (socio-demographic) data set to allow comparisons with SAS. } } \subsection{Bugs Fixed}{ \itemize{ \item The way component scores were found had changed from "regression" to "tenBerge" without being documented. Changed the documentation and added the method of scoring as an option. Problem reported by Alexander Weiss. } } } \section{Changes in psych version 1.1.1111 (2011-11-11)}{ \subsection{Additions}{ \itemize{ \item This is basically a rerelease of version 1.1.10 trying to solve a problem with 32 bit machines and one test that fails. Originally call 1.11.01 but that one still failed the tests. \item Added ... as an option in multi.hist (following a suggestion by Rui Barradas) \item Added the all=FALSE option to test.psych. This turns off one test to avoid a problem in the testing on Solaris 32 bit machines. } } \subsection{Bugs Fixed}{ \itemize{ \item factor.scores and factor.stats were failing in tests for Linux operating systems, but not Macs or PCs. Added some protection against complex eigen values. \item fixed problem in irt.scores reported by David Condon \item fixed irt.fa so it will properly plot for polytomous items } } } \section{Changes in psych version 1.1.10 (2011-10-15)}{ \subsection{Additions}{ \itemize{ \item Started to modify all calls to sd and mean to use apply to respond to their deprecation for data.frames and matrices \item Changed the release numbering system to reflect year and month of release. \item Added a parcels function to form item parcels of size 2 or size 3. \item Added a jitter option to the factor.plot function. \item Added 3 more factor scoring options to the factor.scores function and changed the default scoring option in fa to be the tenBerge \item Added the kaiser function to do kaiser normalization. \item Added the df2latex function to make LaTeX tables. \item Added plot.circular to draw radar and spider plots \item Added a short function to create dummy codes (dummy.code) \item Added a score.irt function to find IRT based scores. \item Added a table of information by attribute level to be (silently) produced in the plot(irt.fa) function. \item Added bifactor and biquartimin as rotation options to fa (based on the Jennrich and Bentler 2011 papers). \item Implemented an improvement to print.psych function to use switch. (Following a suggestion by Joshua Wiley ). This does not change the functionality, but makes the code easier to debug and to change. \item Added an option to alpha to automatically flip items if they seem to be negatively correlated with total score. (Suggested by Jeremy Miles). \item Serious modification to cor.plot to allow for colorRamp data. Makes much prettier correlation plots. (Suggested by David Condon and Joshua Wilt). \item Modified sim and sim.simplex to create State Trait Auto Regressive simplex structures (suggested by Deepika Anand). \item Added a correct option to irt.fa (passes correct to tetrachoric). \item Added the ability for omega to be rerun on the same correlation matrix found by previous omega runs. This speeds up analyses of large matrices but particularly that of tetrachoric/polychoric matrices. \item Modified headtail so that it will not choke on mixed numeric and string data \item Added a smoothing option to the tetachoric correlation function to smooth the resulting matrix if it is not positive definite. \item Added cor.smooth to do a principal components based smoothing for correlations. \item Added global=TRUE option to polychoric (and tetrachoric) correlations. If global=FALSE, each correlation is found using the pairwise taus. This seems to match John Fox's polycor function. If global=TRUE, the taus are the datawise values. This will differ from polycor in the case of a great deal of missing data. Somewhat faster and perhaps more appropriate given that we are trying to find a global solution. \item Added a correction for multiple tests to the p values in the \code{\link{corr.test}} function and added a new function to just report probabilities \code{\link{corr.p}} } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a very subtle bug in score.items for the case of impute="none" where some subjects have no non-missing data. This affected the estimate of the average number of items (num.ob.item) per scale. Reported by Ben Schalet. \item Fixed bug in factor.stats that gave much too narrow confidence intervals for RMSEA (Thanks to Rick Zinbarg and Ashley Kendall for pointing out the problem). \item revised factor extension to properly handle oblique solutions \item fa now reports the call correctly -- since introduction of iterations it had reported the wrong call \item Fixed mixed.cor so that it can handle several special cases. \item Perhaps finally fixed pairs.panels so it leaves us with the original parameter settings \item Fixed a problem in print.psych such that mat.regress output would not print. (Reported by Ben Schalet.) \item Fixed plot.poly so that it correctly plots the ICC results, particularly for negatively keyed items. \item Fixed fa so that the covar option works. (It had been killed when iterations were introduced.) \item Modified fa, fa.irt, factor.stats, factor.scores so that they handle bad data more gracefully. } } } \section{Changes in psych version 1.0.98 (2011-06-10)}{ \subsection{Additions}{ \itemize{ \item Improvements to set.cor to report the canonical correlations and their chi squares. Also added a plot function to set.cor so that it will plot the eigen values of the canonical correllations. \item Added the cushny data set to show the original drug data from student. } } \subsection{Bugs Fixed}{ \itemize{ \item Resolved why the denomiator df in set.cor does not precisely match the Cohen example. This seems to be because Systat is doing an unbiased estimator and is thus multiplying by (u+v)/v. } } } \section{Changes in psych version 1.0.97 (2011-05-15)}{ \subsection{Additions}{ \itemize{ \item Added a polychoric and tetrachoric option to fa.parallel \item Added a keys option to plot.psych (plot.poly and plot.irt) to allow for plotting information functions for selected items while retaining the factor loadings from the complete set. (Requested by Ben Schalet.) \item Added a summary of the graphic output for irt.fa that includes the average information for each item as well as where the item is most informative. (Suggested by David Condon.) \item Integrated the interactions between fa, fa.poly, irt.fa, fa.diagram, plot, etc. to make commands more consistent. \item added a factor analysis for tetrachoric or polychoric correlations (fa.poly) that will work without doing an irt analysis using irt.fa. \item Added bootstrapped confidence intervals for loadings and correlations in factor analysis (fa), for reliability estimates in omega (omega) and fa.poly.(Originally requested by Steve Miller and Renee Engeln-Maddox for polychoric correlations, extended to the omega case.) \item Modified fa.diagram to include graphic output for fa.extension. \item Added Cohen's set correlation to the mat.regress function. (Requested by Amanda Uliaszek.) \item Changed the order of the parameters in the mat.regress function to be more consistent with the y~x notation of most regression functions. \item Added an oblique.scores option to principal. This means that if oblique rotations are done (not the default) then the scores from principal will be oblique as well. } } \subsection{Bugs Fixed}{ \itemize{ \item None yet } } } \section{Changes in psych version 1.0.96 (2011-04-04)}{ \subsection{Additions}{ \itemize{ \item Factor extension (fa.extension) now can extend an omega as well as a normal factor analysis. \item Modified fa so that the factor scores are found for the oblique factors rather than the orthogonal factors. This now produces factor scores that have roughly the same correlations as do the factors. Factor score statistics remain the same. This leads to a different set of factor scores than factanal finds, for it reports the factor scores for the orthogonal factors. \item Modified response.frequencies to incorporate a very nice suggestion by Joshua Wiley to identify unique item responses that are not necessarily integers. } } \subsection{Bugs Fixed}{ \itemize{ \item Modified fa.extension to correctly find extended oblique loadings. \item check added to omega for bizarre case of 1 factor extracted through PC. (This should not be done, for it makes no sense, but nonetheless, it was leading to omega_h > omega_t.) A warning for this condition is now issued and omega_h = omega_t. Reported by Alison Lewis. } } } \section{Changes in psych version 1.0.95 (2011-03-30)}{ \subsection{Additions}{ \itemize{ \item Fixed News so that it works as a NEWS.rd file \item Added an example to msq \item Added a function to perform factor extension (fa.extension) \item Added the Dwyer and Gorsuch data sets as examples of fa.extension. \item Added lavaan.diagram to the set of diagram functions. Modified to work with lavaan 0.4-7 \item Added scatter.hist to draw a scatter plot and associated histograms and densities. \item Modified score.items so that it reports the number of missing responses for each person for each scale. It had previously reported the total number of missing responses but did not break this down by scale. Requested by Ryne Sherman. Changed the documentation to reflect that missing is calculated. \item Modified error.bars.by so that it can not draw lines (lines=FALSE) if desired. (Requested by Ryne Sherman.) \item Added to error.bars.Rd to describe how to supply means and standard errrors. \item Made various data files available as separate rather than collated. (e.g., Thurstone, Bechtoldt,Holzinger) in the bifactor data set (they remain there as well). (Requested by Michael Friendly.) \item Changed VSS.scree and scree to provide a few more options. \item Cleaned up circ.tests to be more compatible with rest of psych. \item Clean up output from schmid to more match other functions. \item Used tools::compactPDF on the vignettes to make them smaller. Also switched to using png instead of pdf for some of the graphics in the vignettes. This reduced the size drastically. } } \subsection{Bugs Fixed}{ \itemize{ \item fixed error.bars.by so that it draws the alpha level instead of 2 alpha level error bars. \item fixed fa for special case of 3 x 3 matrix with correlations of exactly .5 which would lead to a non-conforming array problem \item fixed partial.r to not round output. For rounded output, the print.psych function defaults to 2 digits (reported by David Freedman) \item Finally fixed the output of print.psych.fa so that it labels sorted factors independent of cut (Reported by Reinhold Hatzinger ) } } } \section{Changes in psych version 1.0.94 (2011-01-01)}{ \subsection{Additions}{ \itemize{ \item Eliminated references to polychor in Yule2poly, } } \subsection{Bugs Fixed }{ \itemize{ \item fixed read.clipboard.upper so that it will read labels \item fixed mat.regress so that it can work with a single predictor } } } \section{Changes in psych version 1.0.93 (2010-12-22)}{ \subsection{Additions}{ \itemize{ \item Added mixed.cor to do mixtures of Pearson, polychoric and tetrachoric correlations \item Added legends to error.bars.by bar graph \item Modified score.items so it will work on correlation matrices as well as raw data. \item Improved the plot.irt and plot.poly functions to plot irt output. Called by plot.psych or generic plot call. \item Added a trivial little function, bi.bars to do paired histograms \item Modified irt.fa to allow for multiple factors \item Modified scrub to be a general recoding function \item Modified error.bars.by to allow for ... parameters to be passed into axis \item Modified diagram so it will dispatch a number of diagram functions (fa, prinicpal, omega, iclust, lavaan). \item Added a biplot.psych function to do biplots of fa or principal results. \item Added an unbiased estimate of kurtosis in the kurtosi function. \item Added a predict function to fa and principal (predict.fa and predict.principal, both called from predict.psych) \item Added lavaan.diagram to draw the output from a cfa or sem from the lavaan package. \item Added a function to calculate Mardia's test for multivariate normality \item Added lengends and improved the examples for pairs.panels.by \item Removed the "short" option in score.items \item Added the ability to score scales with greatly missing data (for SAPA). Modified the impute option to include = "none" \item Added the term KR20 and KR21 to documentation of alpha \item Modified pairs.panels so that with the cor option it can draw regressions and give correlations. \item Modified the documentation for error.crosses to give a better example. \item Added the ability to read rownumbers (variable names?) in first column of read.clipboard.lower and read.clipboard.upper \item Tweaked the zlim default for cor.plot to be c(-1,1) rather than c(0,1) \item Minor adjustments to Yule and phi.demo to call tetrachoric instead of polycor, thus making the package work with fewer dependencies. \item Cleaned up the fa summary output } } \subsection{Bugs Fixed }{ \itemize{ \item Fixed dia.curved.arrow to draw one headed curved arrows (as documented) \item Fixed a problem with fa.print option sort -- Although giving the correct communalities and uniquenesses in the unsorted option, when sorted, it was not sorting h2 or u2. } } } \section{Changes in psych version 1.0.92 (2010-09-22)}{ \subsection{Additions}{ \itemize{ \item Added omega total from sem to the omegaSem function. } } \subsection{Bugs Fixed }{ \itemize{ \item error.bars.by was ignoring the user supplied ylim. (reported by Helena Chui) Fixed. \item Was drawing incorrect sl solution in omega.graph (reported by Mark Difford).fixed. \item omega.Rd and schmid.Rd incorrectly referred to fm=ml as fm=mle. Fixed. } } } \section{Changes in psych version 1.0.91 (2010-09-15)}{ \subsection{Additions}{ \itemize{ \item Added iclust and vss as an alternative names to ICLUST and vss to be consistent with most naming conventions. \item Added the simple utility scrub to do data cleaning. \item Added biserial and polyserial to do biserial/polyserial correlations (not to be confused with point biserial) \item Modified score.multiple.choice so that it handles missing data without using imputation \item Changed the bfi data set to include more subjects and to correctly define the Openness scale. \item Modified score.multiple.choice and score.items to report response frequencies for each item (frequencies based upon number of non-missing responses). \item Added the burt data set of 11 emotional variables (from Burt, 1915) \item Added draw.tetra to draw tetrachoric correlations for fixed tau values (teaching demo) \item Added plot.irt and plot.poly to the plot.psych function. These have options to draw the item characteristic function, the item information function, and the test information. \item Added irt.fa to do factor analysis of dichotomous or discrete items by first forming a tetrachoric or polychoric correlation matrix. Then convert the results to IRT form. \item Added tetrachoric and polychoric functions to find tetrachoric/polychoric correlations as well as item discrimination parameters. \item Added omegaSem and omegaFromSem to calculate omega from a confirmatory factor model using John Fox's sem package. \item Added (with permission) Bond's Logical Operations Test (BLOT) data set. } } \subsection{Bugs Fixed}{ \itemize{ \item sim.irt was not properly handling vectors of item discrimination or guessing parameters. Fixed. \item Minor problem in fa.stats that would lead to an error message if chisquare was NaN. \item Minor bug in reverse.code for case of 1 variable (why one would want to use 1 the function for one variable is unclear.) \item tetrachoric would not apply the correction for continuity when given a table input but would when given normal data. Fixed. } } } \section{Changes in psych version 1.0.90 (2010-07-07)}{ \subsection{Additions}{ \itemize{ \item Added a few parameters to fa.parallel to a) set the y label and b) to show or not show a legend. Useful for multipanel displays. \item Minor improvements to fa.diagram to allow for input of factor loadings and intercorrelations from other functions. \item Added sim.omega to simulate multifactorial hierarchical structures as a test (and demonstration) of omega. \item Minor changes to the parameter call list for sim.minor to be compatible with sim.omega. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a problem in ICLUST where it would sometimes not stop at the right number of clusters for n.clus = 1 \item Fixed problem in omega.diagram where it would sometimes use the incorrect line type for hierarchical (non-Schmid-Leiman) diagrams. \item Fixed bug in schmid where it would not rotate exactly orthogonal factors and find exactly omega = 0. \item Added a warning message that oblique rotation does not work, and does a cluster based rotation instead in the case of exactly orthogonal factors. This is just a problem for artificial data sets. \item History (Both of these are probably due to a problem in GPArotation which has since been fixed.) } } } \section{Changes in psych version 1.0.89 (2010-06-21)}{ \subsection{Additions}{ \itemize{ \item Rearranged the order of parameters for fa so that it is easier to call. \item Added various sem stats to fa output (e.g. RMSEA, BIC, rms, crms). These are also provided for omega output. \item Add fit statistics to omega for g solution only. \item Added a measure of general factor adequacy in Omega. This is just the percent of common variance that is general factor variance for each item. High values and low variances suggest a more meaningful estimate than lower average values or higher variances. \item Various clean up of output of ICLUST and iclust.diagram. Added color option for positive and negative loadings. \item Changed ICC to work on complete data (i.e., using na.omit) as the default (following a question) by Ross Culloch \item Changed pairs.panels to allow for color choice of histogram (requested by Elaine Kuo) \item Changed describe to handle numeric but annotated vectors (suggested by Soeren Vogel) \item Clarified documentation of cohen.kappa and allowed for using explicitly non-numerical categories (as suggested by Peter Ehlers and Scot McNary) \item Increased the usefulness of error.bars to allow for input of x values, y values, and confidence interval. Also added the ability to use 1 standard deviation. \item Minor change to print.psych.fa to round before using formatting. \item Removed the cut <- min(cut,max(abs(load))/2) following a request by Reinhold Hatzinger to be consisent with factanal \item Added an option to omega to allow for analysis of data sets from just the factor loadings and correlation matrices. \item Add a scree function to replace VSS.scree } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in pairs.panels for the case of not smoothing but with data ellipses (reported by Hirayuki Sato) \item Fixed a serious bug in omega for the case of non-positive manifold items (e.g., personality items). The sign of the group factors was not correctly being set which would lead to incorrect estimates of the multiple R2. \item Fixed bug in ICLUST.diagram where it would draw on top of other figures in rare cases. \item Fix bug in ICLUST for the path coefficients -- in the case of negative loadings, some of these were much too small. \item Correctly color the paths in omega.diagram. \item Fixed print.psych.fa to correctly showing item numbers for sorted output. \item Corrected tetrachor so it would not inappropriately claim that some cells had no data (this did not affect the correlations, just gave an inappropriate warning). } } } \section{Changes in psych version 1.0.88 (2010-04-24)}{ \subsection{Additions}{ \itemize{ \item Added within option for error.bars and error.bars.by to allow for within subject error estimates. \item Reformatted this file (NEWS) so that the command (e.g.,) news(query=Version > "1.0.85",package="psych") provides nice information. \item Added more detail to the overview vignette to discuss how to find omega. \item Added the block.random function to block randomize for experimental designs. } } \subsection{Bugs Fixed}{ \itemize{ \item Corrected schmid (and thus, omega) so that it finds orthogonal factors before rotating them obliquely. This corrected an error introduced in version 1.0.86 when the default rotation for fa was switched to be oblimin. \item correct=FALSE option in tetrachor actually works } } } \section{Changes in psych version 1.0.87 (2010-04-04)}{ \subsection{Bugs Fixed}{ \itemize{ \item error.bars (and error.bars.by) would not properly draw < 3 variables } } } \section{Changes in psych version 1.0.86 (2010-03-26)}{ \subsection{Additions}{ \itemize{ \item Added logit, logistic and logistic.grm functions to help teach about IRT. \item Added the ability to factor analyze covariance matrices (with resulting loadings of covariances rather than correlations) (suggested by Andreas Moeltner) \item Added two new functions to estimate the greatest lower bound (glb.algebraic, written by Andreas Moeltner) and glb.fa. \item Added read.clipboard.fwf to allow reading of fixed width files from clipboard. (Originally this required adding a patch to the read.fwf function but that was subsequentally fixed.) \item Minor adjustment to ICLUST.cluster so that it will produce fewer clusters (if desired) than it thinks are appropriate. That is, if n.clus is less than the appropriate solution, a warning is issued, but n.clus clusters are found. \item Changed the default rotation in fa to be oblimin. (Subsequently discovered that this introduced a bug into schmid and therefore omega.) \item Added the glb.communality function to calculate the greatest lower bound by using factor communality estimates (based upon correspondece with Andreas Moeltner, although his algorithm (glb.algebraic, which is now included) which uses the Rcsdp provides slightly different estimates). \item added names to communalities in principal (requested by Gumundur Arnkelsson). \item Minor change to headtail \item Added the sim.rasch function to simulate 1 parameter IRT Rasch models. \item Added the data sets bock and income. bock is the source of the lsat data,income is US family income from 2008. \item Seriously modified the estimation of Cohen's Kappa in the wkappa function and added a new function: cohen.kappa to calculate kappa from raw data or from similarity matrices. Also calculates the variances following Fleiss, Cohen, and Blashfield, and then find the confidence limits. \item Modified the sim.minor function to simulate correlation matrices with nfact major factors and nvar/2 minor factors with larger number of variables. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed print.psych.fa to give correct communalities for sorted factors (had been not sorting the communalities, although it had the factor loadings). \item Minor change to comorbidity to clean up output (using print.psych) \item Fixed guttman to correctly estimate lambda 4 (error pointed out by Andreas Moeltner). } } } \section{Changes in psych version 1.0.85 (2009-12-20)}{ \subsection{Additions}{ \itemize{ \item Modified the output from fa.parallel to print summary statistics. Also added an option to use smcs as communality estimates. \item Added the sim.minor function to generate major and minor factor structures. \item Modified fa to use first derivatives based upon the model rather than just empirically derived. This results in an impressive speed improvement. \item Continued to improve the dia.x functions. Labeled the entire set of functions diagram (for easier search). } } } \section{Changes in psych version 1.0.84 (2009-11-30)}{ \subsection{Additions}{ \itemize{ \item Improved documentation for the dia functions to show the power of structure diagrams \item Improved structure.diagram to allow graphs to go left to right (default) or bottom to top (which means causal arrows go left to right) \item Added the affect data set which includes data from two experimental studies of affect. \item Added some features from pairs to allow group coloring in pairs.panels \item Modified factor.parallel to allow for other factoring models (including minres) \item Modified fa to give eigenvalues of the correlation matrix, and of the common factor space. \item Modified pairs.panels so it can plot even if the correlations are NA \item Added a legend and improved the output of cor.plot. \item Having learned how to use strwidth, have cleaned up the dia.rect, fa.diagram, structure.diagram, and iclust.diagram functions. \item Changed the way fa.parallel handles correlation matrices if the number of subjects is not specified. Rather than issuing a warning and proceeding as if n.obs = nvars, it now assumes n.obs =100. \item Improved documentation for fa to explain minimum correlation of factor score estimates. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a problem with printing of the principal (components) results \item Fixed bug in the print.psych.vss function that returned the wrong value (although identifying the correct number) for the Velicer MAP criterion. (Reported by Maike Luhmann). } } } \section{Changes in psych version 1.0.83 (2009-10-26)}{ \subsection{Additions}{ \itemize{ \item Added Suggests Rgraphviz to DESCRIPTION file to get around CRAN test for 2.10 (as I should have done according to documentation for packages). Removed all tests of functions which require RGraphviz for 2.9.2 because Rgraphviz does not work on SnowLeopard. } } } \section{Changes in psych version 1.0.82 (2009-10-26)}{ \subsection{Additions}{ \itemize{ \item Improved the output for mat.regress to allow print() and summary(). \item Corrected problem in describe.by to properly identify the matrix output } } \subsection{Bugs Fixed}{ \itemize{ \item Corrected bug to fa for case of oblique and negatively correlated factors (reported by Erich Studerus). \item Minor improvements to the dia.x functions \item Fixed the formatting of the news file to follow the appropriate format \item Removed the old supplementary manual from the docs folder to make for a cleaner installation } } } \section{Changes in psych version 1.0.81 (2009-10-04)}{ \subsection{Additions}{ \itemize{ \item Improved the documentation for the dia.X functions and made some of the calls simpler. } } } \section{Changes in psych version 1.0.80 (2009-09-30)}{ \subsection{Additions}{ \itemize{ \item Added the ability to draw "diagrams" to replace the functions that required Rgraphviz. These are path diagrams with rectangles for observed variables and ellipses for latent variables. A set of graphic functions for drawing diagrams includes dia.ellipse, dia.rect, dia.arrow, dia.curve and dia.self. This allows the function omega and ICLUST to draw structural diagrams without using Rgraphviz. \item The documentation for these new functions is a bit less than desirable and the dia primitives will be modified slightly for the next release. } } } \section{Changes in psych version 1.0.79 (2009-08-25)}{ \subsection{Additions}{ \itemize{ \item added the Schmid data set which includes the original Schmid-Leiman correlation matrix as well as data from Chen, West, and Sousa (2006). \item modified geometric.mean and harmonic.mean to allow for missing data. \item Cleaned up describe.by so that it can handle matrix output of multiple grouping variables and then print correctly. } } } \section{Changes in psych version 1.0.78 (2009-07-28)}{ \subsection{Additions}{ \itemize{ \item Modified factor.stats to allow for statistics even if the matrix is very singular (i.e., N var >> n.obs). } } \subsection{Bugs Fixed}{ \itemize{ \item Corrected bug to ICLUST introduced in version 74 that can, in some conditions, produce negative betas. } } } \section{Changes in psych version 1.0.77(2009-07-21)}{ \subsection{Bugs Fixed}{ \itemize{ \item Fixed an error in printing communalities for rotated factor solutions reported by Tamaki Hattori. Also resolved a problem of extra lines appearing in error.bars. Minor revision to polar. } } } \section{Changes in psych version 1.0.76 (2009-07-14)}{ \subsection{Bugs Fixed}{ \itemize{ \item Finally resolved the problem of not passing the CRAN tests for Mac (even though developed on a Mac, the test package assumes no other packages). } } } \section{Changes in psych version 1.0.75 (2009-07-01)}{ \subsection{Additions}{ \itemize{ \item Reinstituted reporting communalities and uniquenesses for principal components and for factor analysis. This feature had been dropped sometime in the past and reported missing by Ista Zahn. \item Added two new data sets: Harman.Holzinger and Harman.Burt. The former is used by Peter Bentler as an example of alternative methods for estimating reliability, the latter is a nice example of what happens if the correlation matrix is singular. smc estimates are inappropriate in this case and need to be checked. } } } \section{Changes in psych version 1.0.74 (2009-06-25)}{ \subsection{Additions}{ \itemize{ \item Added a weighting option to ICLUST. Weighted beta finds the average between cluster correlation and takes the ratio of the n ^2 * average between cluster correlation to the total variance of the pooled clusters as a measure of the general factor. That is, Unweighted beta calculates beta based upon the correlation between two clusters, corrected for test length using the Spearman-Brown prophecy formala, while weighted beta finds the average interitem correlation between the items within two clusters and then finds beta from this. That is, for two clusters A and B of size N and M with between average correlation rb, weighted beta is (N+M)^2 rb/(Va +Vb + 2Cab). Raw (unweighted) beta is 2rab/(1+rab) where rab = Cab/sqrt(VaVb). Weighted beta seems a more appropriate estimate and is now the default. Unweighted beta is still available for consistency with prior versions. (Unfortunately, in doing this, a bug was introduced that could produce negative betas. Fixed in Version 78). \item Modified the fa function to include maximimum likelihood factor analysis. } } } \section{Changes in psych version 1.0.73 (2009-06-08)}{ \subsection{Additions}{ \itemize{ \item Added gls fit option to fa \item Added mat.sort to sort correlation matrices by factor loadings. \item Revised the overview vignette to make more readable. \item Fixed (I hope) the documentation error that has been preventing being loaded onto CRAN for Mac. } } } \section{Changes in psych version 1.0.72 (2009-06-02)}{ \subsection{Additions}{ \itemize{ \item Added factor stats to omega } } \subsection{Bugs Fixed}{ \itemize{ \item Minor fix to print.psych.omega to correct printing error introduced in .71 } } } \section{Changes in psych version 1.0.71 (2009-06-01)}{ \subsection{Additions}{ \itemize{ \item Combined principal axis, unweighted least squares (minres) and weighted least squares into one combined function (fa). Eventually the three other functions (factor.pa,factor.wls, and factor.minres) will be phased out. Added more rotation and transformation options (basically by making calls to the GPArotation package). \item factor.stats now reports the factor weights matrix for factor scores. \item A new factor analysis function factor.wls will give weighted least squares (wls). \item Omega will now give an estimate for 1 factor and a message about how there are alternatives for 2 factors. } } \subsection{Bugs Fixed}{ \itemize{ \item Fixed a bug in print.psych.omega so that it will now report the item numbers correctly when sorting. \item Fixed call in phi.demo so that it checks if the polycor package is installed. This was causing problems in the Mac version on CRAN. } } } \section{Changes in psych version 1.0.70 (2009-05-25)}{ \subsection{Additions}{ \itemize{ \item Added factor.stats function to report a variety of statistics for factors or components. In particular, added measures of factorial adequacy. factor.stats is now called by the factor.pa, factor.minres, and principal, so it is easier to adjust all three of these functions. \item Added a new rotation option to target.rot -- if keys are not specified, it will rotate to a cluster solution. \item minor tweak to sim.hierarchical to make it consistent with the Jensen and Weng paper \item added to omega to calculate omege_infinity \item corrected bug in principal for case of singular matrices (reported by Gudmundur Arnkelsson) \item corrected bug in print.psych so that it now sorts by factor loadings correctly } } } \section{Changes in psych version 1.0.69 (2009-04-25)}{ \subsection{Additions}{ \itemize{ \item added cor.plot to graphically display correlation structures \item modified cosinor to do regression as well as optimization for fits \item minor correction to ICLUST to catch non-invertible matrices } } } \section{Changes in psych version 1.0.68 (2009-04-15)}{ \subsection{Additions}{ \itemize{ \item Added method="minres" to do minimal residual (ULS) factor analysis to the factor.pa function. \item Added the target.rot function to do targeted rotation. \item Added the sim.anova function to simulate 3 way ANOVA or linear models. \item Minor cleanups of code in other functions } } } \section{Changes in psych version 1.0.67 (2009-03-15)}{ \subsection{Additions}{ \itemize{ \item Have included the vignettes in the package \item Serious modifications to mat.regress to allow for raw data and covariances to be used. \item Added corrections for item overlap to use estimated communalities of items to alpha, cluster.cor, etc. \item Added estimates of G6+ (Modified Guttman 6) reliabililty to alpha, score.items } } } \section{Changes in psych version 1.0.66 (2009-02-15)}{ \subsection{Additions}{ \itemize{ \item Introduced two package vignettes (overview and psych_for_sem) \item minor modification to ICLUST with respect to labeling \item addition to factor.congruence to allow for comparing omega solutions with factor solutions \item modified describe to allow for descriptions of categorical variables. } } } \section{Changes in psych version 1.0.64 (2009-02-01)}{ \subsection{Additions}{ \itemize{ \item Added the function alpha to find Cronbach's alpha and Guttman's Lambda 6 for total scales as well as with single item deleted. \item Minor enhancements to the structure.graph and structure.sem functions. Further refinements to the psych_for_sem.pdf tutorial. } } } \section{Changes in psych version 1.0.63 (2009-01-15)}{ \subsection{Additions}{ \itemize{ \item Added structure.graph and structure.sem to create graphical output and pre-sem commands for sem. \item Modified omega.graph to return sem commands for running the sem function from John Fox. \item Added Promax to report factor intercorrelations for promax rotations. \item bug fix in VSS and omega to allow for promax rotations. \item correction to winsor so that it does not sort the vectors of the input data.frame. \item bug fix in principal (for problem of oblique rotations) \item continued improvements in the print.psych and summary.psych functions \item cleaned up sim.congeneric to reduce the amount of output (now handled through print.psych) \item added super.matrix function (to combine two matrices -- useful for constructing keys and for doing examples in structure.graph) \item Improvements to structure graph. } } } \section{Changes in psych version 1.0.62 (2008-12-31)}{ \subsection{Additions}{ \itemize{ \item added the structure.graph function \item added circadian functions \item renamed some functions to make the index order easier to follow. } } } \section{Changes in psych version 1.0.58 (2008-09-15)}{ \subsection{Additions}{ \itemize{ \item Introduced the guttman function to estimate the 6 Guttman (1945) estimates (as well as a few others). } } } \section{Changes in psych version 1.0.54 (2008-08-15)}{ \subsection{Additions}{ \itemize{ \item Added print.psych and summary.psych generic functions to clean up the output of many of the functions. } } } \section{Changes in psych version 1.0.50 (2008-06-30)}{ \subsection{Additions}{ \itemize{ \item Introduced several new functions: \item headtail (combines head and tail functions to produce the first N and last M rows of a matrix or dataframe) \item error.bars and \item error.bars.by plot means with error bars by a grouping variable \item omega revised to allow for negatively keyed items. Detects items that should be reversed. } } } \section{Changes in psych version 1.0.42 (2008-03-24)}{ \subsection{Additions}{ \itemize{ \item A relatively stable release. \item Passed the tests for R 2.6.2 } } } \section{Changes in psych version 1.0.40 (2008-02-15)}{ \subsection{Additions}{ \itemize{ \item score.multiple.choice provides scores and item statistics for multiple choice items. \item circ.sim modified to be more general item simulations \item circ.simulation \item item.sim \item polar converts two dimensional factor loadings to polar coordinates. } } } \section{Changes in psych version 1.0.33 (2007-10-11)}{ \subsection{Additions}{ \itemize{ \item A relatively stable release. } } } \section{Changes in psych version 1.0.30 (2007-09-09)}{ \subsection{Additions}{ \itemize{ \item A relatively stable release. } } } \section{Changes in psych version 1.0.27 (2007-07-31)}{ \subsection{Additions}{ \itemize{ \item Another upgrade. \item Passed tests for R 2.5 } } } \section{Changes in psych version 1.0.17 (2007-05-06)}{ \subsection{Additions}{ \itemize{ \item The first release to CRAN. Prior versions had been tested at NU. \item Included pairs.panels, describe, read.clipboard, iclust, vss, principal. } } } \section{Changes in psych version 1.0-14 (2006-09-11)}{ \subsection{Additions}{ \itemize{ \item Another release } } } \section{Changes in psych version 1.0-9 (2006-08-26)}{ \subsection{Additions}{ \itemize{ \item Another release } } } \section{Changes in psych version 1.0-1 (2005-05-19)}{ \subsection{Additions}{ \itemize{ \item The first package version on the personality-project.org repository. \item included some of the functions previously in the "useful R" source file. \item worked with R 2.1.0 } } }